Merge pull request #3586 from martin-frbg/arm64cpus

Initial support for M1 on Linux, Phytium FT2000 series, ARMV9 Cortex X1,X2,A510,A710
This commit is contained in:
Martin Kroeker 2022-03-27 18:12:21 +02:00 committed by GitHub
commit 5d2f451d09
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 1084 additions and 25 deletions

View File

@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif
endif
ifeq ($(CORE), FT2000)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
# Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
@ -229,6 +236,43 @@ endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX1)
CCOMMON_OPT += -march=armv9 -mtune=cortexx1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv9 -mtune=cortexx1
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX2)
CCOMMON_OPT += -march=armv9 -mtune=cortexx2
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv9 -mtune=cortexx2
endif
endif
endif
#ifeq (1, $(filter 1,$(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA510)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
endif
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA710)
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
endif
endif
endif
endif
endif

View File

@ -92,6 +92,10 @@ CORTEXA53
CORTEXA57
CORTEXA72
CORTEXA73
CORTEXA510
CORTEXA710
CORTEXX1
CORTEXX2
NEOVERSEN1
NEOVERSEV1
NEOVERSEN2
@ -103,6 +107,9 @@ THUNDERX2T99
TSV110
THUNDERX3T110
VORTEX
A64FX
ARMV8SVE
FT2000
9.System Z:
ZARCH_GENERIC

View File

@ -316,6 +316,7 @@ if ($architecture ne $hostarch) {
}
$cross = 1 if ($os ne $hostos);
$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != ""));
$openmp = "" if $ENV{USE_OPENMP} != 1;

View File

@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE)
endif ()
endif ()
if (${CORE} STREQUAL CORTEXA510)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
if (${CORE} STREQUAL CORTEXA710)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
if (${CORE} STREQUAL CORTEXX1)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
if (${CORE} STREQUAL CORTEXX2)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
if (${CORE} STREQUAL POWER10)
if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)

View File

@ -45,6 +45,10 @@ size_t length64=sizeof(value64);
#define CPU_NEOVERSEN1 11
#define CPU_NEOVERSEV1 16
#define CPU_NEOVERSEN2 17
#define CPU_CORTEXX1 18
#define CPU_CORTEXX2 19
#define CPU_CORTEXA510 20
#define CPU_CORTEXA710 21
// Qualcomm
#define CPU_FALKOR 6
// Cavium
@ -59,6 +63,8 @@ size_t length64=sizeof(value64);
#define CPU_VORTEX 13
// Fujitsu
#define CPU_A64FX 15
// Phytium
#define CPU_FT2000 22
static char *cpuname[] = {
"UNKNOWN",
@ -73,12 +79,17 @@ static char *cpuname[] = {
"TSV110",
"EMAG8180",
"NEOVERSEN1",
"NEOVERSEV1"
"NEOVERSEN2"
"THUNDERX3T110",
"VORTEX",
"CORTEXA55",
"A64FX"
"A64FX",
"NEOVERSEV1",
"NEOVERSEN2",
"CORTEXX1",
"CORTEXX2",
"CORTEXA510",
"CORTEXA710",
"FT2000"
};
static char *cpuname_lower[] = {
@ -94,12 +105,17 @@ static char *cpuname_lower[] = {
"tsv110",
"emag8180",
"neoversen1",
"neoversev1",
"neoversen2",
"thunderx3t110",
"vortex",
"cortexa55",
"a64fx"
"a64fx",
"neoversev1",
"neoversen2",
"cortexx1",
"cortexx2",
"cortexa510",
"cortexa710",
"ft2000"
};
int get_feature(char *search)
@ -182,6 +198,14 @@ int detect(void)
return CPU_NEOVERSEN2;
else if (strstr(cpu_part, "0xd05"))
return CPU_CORTEXA55;
else if (strstr(cpu_part, "0xd46"))
return CPU_CORTEXA510;
else if (strstr(cpu_part, "0xd47"))
return CPU_CORTEXA710;
else if (strstr(cpu_part, "0xd44"))
return CPU_CORTEXX1;
else if (strstr(cpu_part, "0xd4c"))
return CPU_CORTEXX2;
}
// Qualcomm
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
@ -202,6 +226,13 @@ int detect(void)
// Fujitsu
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
return CPU_A64FX;
// Apple
else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
return CPU_VORTEX;
// Phytium
else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661")
|| strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
return CPU_FT2000;
}
p = (char *) NULL ;
@ -382,7 +413,24 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 48\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_CORTEXA510:
case CPU_CORTEXA710:
case CPU_CORTEXX1:
case CPU_CORTEXX2:
printf("#define ARMV9\n");
printf("#define %s\n", cpuname[d]);
printf("#define L1_CODE_SIZE 65536\n");
printf("#define L1_CODE_LINESIZE 64\n");
printf("#define L1_CODE_ASSOCIATIVE 4\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L1_DATA_ASSOCIATIVE 4\n");
printf("#define L2_SIZE 1048576\n");
printf("#define L2_LINESIZE 64\n");
printf("#define L2_ASSOCIATIVE 8\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_FALKOR:
printf("#define FALKOR\n");
printf("#define L1_CODE_SIZE 65536\n");
@ -469,9 +517,9 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
#ifdef __APPLE__
case CPU_VORTEX:
printf("#define VORTEX \n");
#ifdef __APPLE__
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
printf("#define L1_CODE_SIZE %lld \n",value64);
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
@ -480,10 +528,10 @@ void get_cpuconfig(void)
printf("#define L1_DATA_SIZE %lld \n",value64);
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
printf("#define L2_SIZE %lld \n",value64);
#endif
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n");
break;
#endif
case CPU_A64FX:
printf("#define A64FX\n");
printf("#define L1_CODE_SIZE 65535\n");
@ -494,6 +542,16 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
case CPU_FT2000:
printf("#define FT2000\n");
printf("#define L1_CODE_SIZE 32768\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 33554432\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
break;
}
get_cpucount();
}

View File

@ -1232,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa53"
#define CORENAME "CORTEXA53"
#else
#endif
#ifdef FORCE_CORTEXA57
@ -1248,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa57"
#define CORENAME "CORTEXA57"
#else
#endif
#ifdef FORCE_CORTEXA72
@ -1264,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa72"
#define CORENAME "CORTEXA72"
#else
#endif
#ifdef FORCE_CORTEXA73
@ -1280,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa73"
#define CORENAME "CORTEXA73"
#else
#endif
#ifdef FORCE_CORTEXX1
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXX1"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXX1 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
#define LIBNAME "cortexx1"
#define CORENAME "CORTEXX1"
#endif
#ifdef FORCE_CORTEXX2
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXX2"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXX2 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
#define LIBNAME "cortexx2"
#define CORENAME "CORTEXX2"
#endif
#ifdef FORCE_CORTEXA510
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA510"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA510 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
#define LIBNAME "cortexa510"
#define CORENAME "CORTEXA510"
#endif
#ifdef FORCE_CORTEXA710
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "CORTEXA710"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DCORTEXA710 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
#define LIBNAME "cortexa710"
#define CORENAME "CORTEXA710"
#endif
#ifdef FORCE_NEOVERSEN1
@ -1297,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-march=armv8.2-a -mtune=neoverse-n1"
#define LIBNAME "neoversen1"
#define CORENAME "NEOVERSEN1"
#else
#endif
#ifdef FORCE_NEOVERSEV1
@ -1314,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-march=armv8.4-a -mtune=neoverse-v1"
#define LIBNAME "neoversev1"
#define CORENAME "NEOVERSEV1"
#else
#endif
@ -1332,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-march=armv8.5-a -mtune=neoverse-n2"
#define LIBNAME "neoversen2"
#define CORENAME "NEOVERSEN2"
#else
#endif
#ifdef FORCE_CORTEXA55
@ -1348,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "cortexa55"
#define CORENAME "CORTEXA55"
#else
#endif
#ifdef FORCE_FALKOR
@ -1364,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "falkor"
#define CORENAME "FALKOR"
#else
#endif
#ifdef FORCE_THUNDERX
@ -1379,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx"
#define CORENAME "THUNDERX"
#else
#endif
#ifdef FORCE_THUNDERX2T99
@ -1397,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx2t99"
#define CORENAME "THUNDERX2T99"
#else
#endif
#ifdef FORCE_TSV110
@ -1413,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "tsv110"
#define CORENAME "TSV110"
#else
#endif
#ifdef FORCE_EMAG8180
@ -1448,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "thunderx3t110"
#define CORENAME "THUNDERX3T110"
#else
#endif
#ifdef FORCE_VORTEX
@ -1480,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
#define LIBNAME "a64fx"
#define CORENAME "A64FX"
#else
#endif
#ifdef FORCE_FT2000
#define ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "FT2000"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DFT2000 " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
"-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "ft2000"
#define CORENAME "FT2000"
#endif
#ifdef FORCE_ZARCH_GENERIC

View File

@ -0,0 +1,216 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

View File

@ -0,0 +1,216 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

View File

@ -0,0 +1,216 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

View File

@ -0,0 +1,216 @@
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
TRSMCOPYLN_M = trsm_lncopy_sve.c
TRSMCOPYLT_M = trsm_ltcopy_sve.c
TRSMCOPYUN_M = trsm_uncopy_sve.c
TRSMCOPYUT_M = trsm_utcopy_sve.c
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
SAMAXKERNEL = amax.S
DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
SAXPYKERNEL = axpy.S
DAXPYKERNEL = axpy.S
CAXPYKERNEL = zaxpy.S
ZAXPYKERNEL = zaxpy.S
SROTKERNEL = rot.S
DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S
SGEMVNKERNEL = gemv_n.S
DGEMVNKERNEL = gemv_n.S
CGEMVNKERNEL = zgemv_n.S
ZGEMVNKERNEL = zgemv_n.S
SGEMVTKERNEL = gemv_t.S
DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
SASUMKERNEL = asum.S
DASUMKERNEL = asum.S
CASUMKERNEL = casum.S
ZASUMKERNEL = zasum.S
SCOPYKERNEL = copy.S
DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
SSWAPKERNEL = swap.S
DSWAPKERNEL = swap.S
CSWAPKERNEL = swap.S
ZSWAPKERNEL = swap.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
DDOTKERNEL = dot.S
ifneq ($(C_COMPILER), PGI)
SDOTKERNEL = ../generic/dot.c
else
SDOTKERNEL = dot.S
endif
ifneq ($(C_COMPILER), PGI)
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
else
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
endif
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
SSYMMUCOPY_M = symm_ucopy_sve.c
SSYMMLCOPY_M = symm_lcopy_sve.c
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
DSYMMUCOPY_M = symm_ucopy_sve.c
DSYMMLCOPY_M = symm_lcopy_sve.c
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
CSYMMUCOPY_M = zsymm_ucopy_sve.c
CSYMMLCOPY_M = zsymm_lcopy_sve.c
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
ZSYMMLCOPY_M = zsymm_lcopy_sve.c

View File

@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.CORTEXA57

View File

@ -3130,7 +3130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(CORTEXA57) || \
defined(CORTEXA72) || defined(CORTEXA73) || \
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#elif defined(ARMV8SVE) || defined(A64FX)
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */