commit
9cac379655
|
@ -416,6 +416,29 @@ endif ()
|
||||||
set(ZGEMM_UNROLL_M 4)
|
set(ZGEMM_UNROLL_M 4)
|
||||||
set(ZGEMM_UNROLL_N 4)
|
set(ZGEMM_UNROLL_N 4)
|
||||||
set(SYMV_P 16)
|
set(SYMV_P 16)
|
||||||
|
elseif ("${TCORE}" STREQUAL "VORTEX")
|
||||||
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
|
"#define ARMV8\n"
|
||||||
|
"#define L1_CODE_SIZE\t32768\n"
|
||||||
|
"#define L1_CODE_LINESIZE\t64\n"
|
||||||
|
"#define L1_CODE_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L1_DATA_SIZE\t32768\n"
|
||||||
|
"#define L1_DATA_LINESIZE\t64\n"
|
||||||
|
"#define L1_DATA_ASSOCIATIVE\t4\n"
|
||||||
|
"#define L2_SIZE\t5262144\n"
|
||||||
|
"#define L2_LINESIZE\t64\n"
|
||||||
|
"#define L2_ASSOCIATIVE\t8\n"
|
||||||
|
"#define DTB_DEFAULT_ENTRIES\t64\n"
|
||||||
|
"#define DTB_SIZE\t4096\n")
|
||||||
|
set(SGEMM_UNROLL_M 16)
|
||||||
|
set(SGEMM_UNROLL_N 4)
|
||||||
|
set(DGEMM_UNROLL_M 8)
|
||||||
|
set(DGEMM_UNROLL_N 4)
|
||||||
|
set(CGEMM_UNROLL_M 8)
|
||||||
|
set(CGEMM_UNROLL_N 4)
|
||||||
|
set(ZGEMM_UNROLL_M 4)
|
||||||
|
set(ZGEMM_UNROLL_N 4)
|
||||||
|
set(SYMV_P 16)
|
||||||
elseif ("${TCORE}" STREQUAL "POWER6")
|
elseif ("${TCORE}" STREQUAL "POWER6")
|
||||||
file(APPEND ${TARGET_CONF_TEMP}
|
file(APPEND ${TARGET_CONF_TEMP}
|
||||||
"#define L1_DATA_SIZE 32768\n"
|
"#define L1_DATA_SIZE 32768\n"
|
||||||
|
|
|
@ -424,7 +424,7 @@ void get_cpuconfig(void)
|
||||||
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L1_DATA_SIZE %d \n",value);
|
printf("#define L1_DATA_SIZE %d \n",value);
|
||||||
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
|
||||||
printf("#define L2_DATA_SIZE %d \n",value);
|
printf("#define L2_SIZE %d \n",value);
|
||||||
break;
|
break;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -120,10 +120,10 @@ dll : ../$(LIBDLLNAME)
|
||||||
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
|
||||||
|
|
||||||
$(LIBPREFIX).def : gensymbol
|
$(LIBPREFIX).def : gensymbol
|
||||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
libgoto_hpl.def : gensymbol
|
libgoto_hpl.def : gensymbol
|
||||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
ifeq ($(OSNAME), Darwin)
|
ifeq ($(OSNAME), Darwin)
|
||||||
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||||
|
@ -258,16 +258,16 @@ static : ../$(LIBNAME)
|
||||||
rm -f goto.$(SUFFIX)
|
rm -f goto.$(SUFFIX)
|
||||||
|
|
||||||
osx.def : gensymbol ../Makefile.system ../getarch.c
|
osx.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
aix.def : gensymbol ../Makefile.system ../getarch.c
|
aix.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
objcopy.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
objconv.def : gensymbol ../Makefile.system ../getarch.c
|
||||||
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
|
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
|
||||||
|
|
||||||
test : linktest.c
|
test : linktest.c
|
||||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
|
||||||
|
|
|
@ -50,7 +50,7 @@
|
||||||
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
|
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
|
||||||
zgeadd, dzsum);
|
zgeadd, dzsum);
|
||||||
|
|
||||||
@cblasobjs = (lsame, xerbla);
|
@blasobjs = (lsame, xerbla);
|
||||||
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
||||||
@cblasobjsc = (
|
@cblasobjsc = (
|
||||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||||
|
@ -92,7 +92,7 @@
|
||||||
cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy
|
cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy
|
||||||
);
|
);
|
||||||
|
|
||||||
@cblasobjs = ( cblas_xerbla );
|
@cblasobjs = ( cblas_xerbla );
|
||||||
|
|
||||||
@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
|
@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
|
||||||
|
|
||||||
|
@ -3600,6 +3600,7 @@ if ($ARGV[13] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objss);
|
@lapack2objs = (@lapack2objs, @lapack2objss);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
|
||||||
}
|
}
|
||||||
if ($ARGV[14] == 1) {
|
if ($ARGV[14] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsd);
|
@blasobjs = (@blasobjs, @blasobjsd);
|
||||||
|
@ -3608,6 +3609,7 @@ if ($ARGV[14] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsd);
|
@lapack2objs = (@lapack2objs, @lapack2objsd);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d);
|
||||||
}
|
}
|
||||||
if ($ARGV[15] == 1) {
|
if ($ARGV[15] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsc);
|
@blasobjs = (@blasobjs, @blasobjsc);
|
||||||
|
@ -3618,6 +3620,7 @@ if ($ARGV[15] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
|
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc, @lapackobjs2c);
|
||||||
}
|
}
|
||||||
if ($ARGV[16] == 1) {
|
if ($ARGV[16] == 1) {
|
||||||
@blasobjs = (@blasobjs, @blasobjsz);
|
@blasobjs = (@blasobjs, @blasobjsz);
|
||||||
|
@ -3628,6 +3631,7 @@ if ($ARGV[16] == 1) {
|
||||||
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
|
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
|
||||||
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
|
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
|
||||||
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
|
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
|
||||||
|
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz, @lapackobjs2z);
|
||||||
}
|
}
|
||||||
if ($ARGV[8] == 1) {
|
if ($ARGV[8] == 1) {
|
||||||
#ONLY_CBLAS=1
|
#ONLY_CBLAS=1
|
||||||
|
|
14
getarch.c
14
getarch.c
|
@ -1222,6 +1222,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_VORTEX
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM64"
|
||||||
|
#define SUBARCHITECTURE "VORTEX"
|
||||||
|
#define SUBDIRNAME "arm64"
|
||||||
|
#define ARCHCONFIG "-DVORTEX " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
|
#define LIBNAME "vortex"
|
||||||
|
#define CORENAME "VORTEX"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_ZARCH_GENERIC
|
#ifdef FORCE_ZARCH_GENERIC
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "ZARCH"
|
#define ARCHITECTURE "ZARCH"
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
|
||||||
/* the direct sgemm code written by Arjan van der Ven */
|
/* the direct sgemm code written by Arjan van der Ven */
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
||||||
/*
|
/*
|
||||||
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
||||||
* of the sgemm call, avoiding the copies, memory realignments and threading,
|
* of the sgemm call, avoiding the copies, memory realignments and threading,
|
||||||
|
|
Loading…
Reference in New Issue