commit
9cac379655
|
@ -416,6 +416,29 @@ endif ()
|
|||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "VORTEX")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define ARMV8\n"
|
||||
"#define L1_CODE_SIZE\t32768\n"
|
||||
"#define L1_CODE_LINESIZE\t64\n"
|
||||
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||
"#define L1_DATA_SIZE\t32768\n"
|
||||
"#define L1_DATA_LINESIZE\t64\n"
|
||||
"#define L1_DATA_ASSOCIATIVE\t4\n"
|
||||
"#define L2_SIZE\t5262144\n"
|
||||
"#define L2_LINESIZE\t64\n"
|
||||
"#define L2_ASSOCIATIVE\t8\n"
|
||||
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||
"#define DTB_SIZE\t4096\n")
|
||||
set(SGEMM_UNROLL_M 16)
|
||||
set(SGEMM_UNROLL_N 4)
|
||||
set(DGEMM_UNROLL_M 8)
|
||||
set(DGEMM_UNROLL_N 4)
|
||||
set(CGEMM_UNROLL_M 8)
|
||||
set(CGEMM_UNROLL_N 4)
|
||||
set(ZGEMM_UNROLL_M 4)
|
||||
set(ZGEMM_UNROLL_N 4)
|
||||
set(SYMV_P 16)
|
||||
elseif ("${TCORE}" STREQUAL "POWER6")
|
||||
file(APPEND ${TARGET_CONF_TEMP}
|
||||
"#define L1_DATA_SIZE 32768\n"
|
||||
|
|
|
@ -424,7 +424,7 @@ void get_cpuconfig(void)
|
|||
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
||||
printf("#define L1_DATA_SIZE %d \n",value);
|
||||
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
||||
printf("#define L2_DATA_SIZE %d \n",value);
|
||||
printf("#define L2_SIZE %d \n",value);
|
||||
break;
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -120,10 +120,10 @@ dll : ../$(LIBDLLNAME)
|
|||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
||||
|
||||
$(LIBPREFIX).def : gensymbol
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
libgoto_hpl.def : gensymbol
|
||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
|
@ -258,16 +258,16 @@ static : ../$(LIBNAME)
|
|||
rm -f goto.$(SUFFIX)
|
||||
|
||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||
|
||||
test : linktest.c
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||
|
|
|
@ -50,7 +50,7 @@
|
|||
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
|
||||
zgeadd, dzsum);
|
||||
|
||||
@cblasobjs = (lsame, xerbla);
|
||||
@blasobjs = (lsame, xerbla);
|
||||
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
||||
@cblasobjsc = (
|
||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||
|
@ -3600,6 +3600,7 @@ if ($ARGV[13] == 1) {
|
|||
@lapack2objs = (@lapack2objs, @lapack2objss);
|
||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
|
||||
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
|
||||
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
|
||||
}
|
||||
if ($ARGV[14] == 1) {
|
||||
@blasobjs = (@blasobjs, @blasobjsd);
|
||||
|
@ -3608,6 +3609,7 @@ if ($ARGV[14] == 1) {
|
|||
@lapack2objs = (@lapack2objs, @lapack2objsd);
|
||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
|
||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
|
||||
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d);
|
||||
}
|
||||
if ($ARGV[15] == 1) {
|
||||
@blasobjs = (@blasobjs, @blasobjsc);
|
||||
|
@ -3618,6 +3620,7 @@ if ($ARGV[15] == 1) {
|
|||
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
|
||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
|
||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
|
||||
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc, @lapackobjs2c);
|
||||
}
|
||||
if ($ARGV[16] == 1) {
|
||||
@blasobjs = (@blasobjs, @blasobjsz);
|
||||
|
@ -3628,6 +3631,7 @@ if ($ARGV[16] == 1) {
|
|||
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
|
||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
|
||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
|
||||
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz, @lapackobjs2z);
|
||||
}
|
||||
if ($ARGV[8] == 1) {
|
||||
#ONLY_CBLAS=1
|
||||
|
|
14
getarch.c
14
getarch.c
|
@ -1222,6 +1222,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_VORTEX
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "VORTEX"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DVORTEX " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "vortex"
|
||||
#define CORENAME "VORTEX"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ZARCH_GENERIC
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ZARCH"
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
||||
/* the direct sgemm code written by Arjan van der Ven */
|
||||
#include <immintrin.h>
|
||||
#include "common.h"
|
||||
|
||||
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
||||
/*
|
||||
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
||||
* of the sgemm call, avoiding the copies, memory realignments and threading,
|
||||
|
|
Loading…
Reference in New Issue