Merge pull request #3586 from martin-frbg/arm64cpus
Initial support for M1 on Linux, Phytium FT2000 series, ARMV9 Cortex X1,X2,A510,A710
This commit is contained in:
commit
5d2f451d09
|
@ -55,6 +55,13 @@ FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FT2000)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N1 is only available
|
||||
# in GCC>=9
|
||||
ifeq ($(CORE), NEOVERSEN1)
|
||||
|
@ -229,6 +236,43 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX1)
|
||||
CCOMMON_OPT += -march=armv9 -mtune=cortexx1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv9 -mtune=cortexx1
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX2)
|
||||
CCOMMON_OPT += -march=armv9 -mtune=cortexx2
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv9 -mtune=cortexx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#ifeq (1, $(filter 1,$(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA510)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA710)
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
endif
|
||||
|
|
|
@ -92,6 +92,10 @@ CORTEXA53
|
|||
CORTEXA57
|
||||
CORTEXA72
|
||||
CORTEXA73
|
||||
CORTEXA510
|
||||
CORTEXA710
|
||||
CORTEXX1
|
||||
CORTEXX2
|
||||
NEOVERSEN1
|
||||
NEOVERSEV1
|
||||
NEOVERSEN2
|
||||
|
@ -103,6 +107,9 @@ THUNDERX2T99
|
|||
TSV110
|
||||
THUNDERX3T110
|
||||
VORTEX
|
||||
A64FX
|
||||
ARMV8SVE
|
||||
FT2000
|
||||
|
||||
9.System Z:
|
||||
ZARCH_GENERIC
|
||||
|
|
1
c_check
1
c_check
|
@ -316,6 +316,7 @@ if ($architecture ne $hostarch) {
|
|||
}
|
||||
|
||||
$cross = 1 if ($os ne $hostos);
|
||||
$cross = 0 if (($os eq "Android") && ($hostos eq "Linux") && ($ENV{TERMUX_APP_PID} != ""));
|
||||
|
||||
$openmp = "" if $ENV{USE_OPENMP} != 1;
|
||||
|
||||
|
|
|
@ -161,6 +161,30 @@ if (${CORE} STREQUAL ARMV8SVE)
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXA510)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXA710)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXX1)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL CORTEXX2)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL POWER10)
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
|
|
|
@ -45,6 +45,10 @@ size_t length64=sizeof(value64);
|
|||
#define CPU_NEOVERSEN1 11
|
||||
#define CPU_NEOVERSEV1 16
|
||||
#define CPU_NEOVERSEN2 17
|
||||
#define CPU_CORTEXX1 18
|
||||
#define CPU_CORTEXX2 19
|
||||
#define CPU_CORTEXA510 20
|
||||
#define CPU_CORTEXA710 21
|
||||
// Qualcomm
|
||||
#define CPU_FALKOR 6
|
||||
// Cavium
|
||||
|
@ -59,6 +63,8 @@ size_t length64=sizeof(value64);
|
|||
#define CPU_VORTEX 13
|
||||
// Fujitsu
|
||||
#define CPU_A64FX 15
|
||||
// Phytium
|
||||
#define CPU_FT2000 22
|
||||
|
||||
static char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
|
@ -73,12 +79,17 @@ static char *cpuname[] = {
|
|||
"TSV110",
|
||||
"EMAG8180",
|
||||
"NEOVERSEN1",
|
||||
"NEOVERSEV1"
|
||||
"NEOVERSEN2"
|
||||
"THUNDERX3T110",
|
||||
"VORTEX",
|
||||
"CORTEXA55",
|
||||
"A64FX"
|
||||
"A64FX",
|
||||
"NEOVERSEV1",
|
||||
"NEOVERSEN2",
|
||||
"CORTEXX1",
|
||||
"CORTEXX2",
|
||||
"CORTEXA510",
|
||||
"CORTEXA710",
|
||||
"FT2000"
|
||||
};
|
||||
|
||||
static char *cpuname_lower[] = {
|
||||
|
@ -94,12 +105,17 @@ static char *cpuname_lower[] = {
|
|||
"tsv110",
|
||||
"emag8180",
|
||||
"neoversen1",
|
||||
"neoversev1",
|
||||
"neoversen2",
|
||||
"thunderx3t110",
|
||||
"vortex",
|
||||
"cortexa55",
|
||||
"a64fx"
|
||||
"a64fx",
|
||||
"neoversev1",
|
||||
"neoversen2",
|
||||
"cortexx1",
|
||||
"cortexx2",
|
||||
"cortexa510",
|
||||
"cortexa710",
|
||||
"ft2000"
|
||||
};
|
||||
|
||||
int get_feature(char *search)
|
||||
|
@ -182,6 +198,14 @@ int detect(void)
|
|||
return CPU_NEOVERSEN2;
|
||||
else if (strstr(cpu_part, "0xd05"))
|
||||
return CPU_CORTEXA55;
|
||||
else if (strstr(cpu_part, "0xd46"))
|
||||
return CPU_CORTEXA510;
|
||||
else if (strstr(cpu_part, "0xd47"))
|
||||
return CPU_CORTEXA710;
|
||||
else if (strstr(cpu_part, "0xd44"))
|
||||
return CPU_CORTEXX1;
|
||||
else if (strstr(cpu_part, "0xd4c"))
|
||||
return CPU_CORTEXX2;
|
||||
}
|
||||
// Qualcomm
|
||||
else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00"))
|
||||
|
@ -202,6 +226,13 @@ int detect(void)
|
|||
// Fujitsu
|
||||
else if (strstr(cpu_implementer, "0x46") && strstr(cpu_part, "0x001"))
|
||||
return CPU_A64FX;
|
||||
// Apple
|
||||
else if (strstr(cpu_implementer, "0x61") && strstr(cpu_part, "0x022"))
|
||||
return CPU_VORTEX;
|
||||
// Phytium
|
||||
else if (strstr(cpu_implementer, "0x70") && (strstr(cpu_part, "0x660") || strstr(cpu_part, "0x661")
|
||||
|| strstr(cpu_part, "0x662") || strstr(cpu_part, "0x663")))
|
||||
return CPU_FT2000;
|
||||
}
|
||||
|
||||
p = (char *) NULL ;
|
||||
|
@ -382,7 +413,24 @@ void get_cpuconfig(void)
|
|||
printf("#define DTB_DEFAULT_ENTRIES 48\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
|
||||
case CPU_CORTEXA510:
|
||||
case CPU_CORTEXA710:
|
||||
case CPU_CORTEXX1:
|
||||
case CPU_CORTEXX2:
|
||||
printf("#define ARMV9\n");
|
||||
printf("#define %s\n", cpuname[d]);
|
||||
printf("#define L1_CODE_SIZE 65536\n");
|
||||
printf("#define L1_CODE_LINESIZE 64\n");
|
||||
printf("#define L1_CODE_ASSOCIATIVE 4\n");
|
||||
printf("#define L1_DATA_SIZE 65536\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L1_DATA_ASSOCIATIVE 4\n");
|
||||
printf("#define L2_SIZE 1048576\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define L2_ASSOCIATIVE 8\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
case CPU_FALKOR:
|
||||
printf("#define FALKOR\n");
|
||||
printf("#define L1_CODE_SIZE 65536\n");
|
||||
|
@ -469,9 +517,9 @@ void get_cpuconfig(void)
|
|||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
#ifdef __APPLE__
|
||||
case CPU_VORTEX:
|
||||
printf("#define VORTEX \n");
|
||||
#ifdef __APPLE__
|
||||
sysctlbyname("hw.l1icachesize",&value64,&length64,NULL,0);
|
||||
printf("#define L1_CODE_SIZE %lld \n",value64);
|
||||
sysctlbyname("hw.cachelinesize",&value64,&length64,NULL,0);
|
||||
|
@ -480,10 +528,10 @@ void get_cpuconfig(void)
|
|||
printf("#define L1_DATA_SIZE %lld \n",value64);
|
||||
sysctlbyname("hw.l2cachesize",&value64,&length64,NULL,0);
|
||||
printf("#define L2_SIZE %lld \n",value64);
|
||||
#endif
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
|
||||
printf("#define DTB_SIZE 4096 \n");
|
||||
break;
|
||||
#endif
|
||||
case CPU_A64FX:
|
||||
printf("#define A64FX\n");
|
||||
printf("#define L1_CODE_SIZE 65535\n");
|
||||
|
@ -494,6 +542,16 @@ void get_cpuconfig(void)
|
|||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
case CPU_FT2000:
|
||||
printf("#define FT2000\n");
|
||||
printf("#define L1_CODE_SIZE 32768\n");
|
||||
printf("#define L1_DATA_SIZE 32768\n");
|
||||
printf("#define L1_DATA_LINESIZE 64\n");
|
||||
printf("#define L2_SIZE 33554432\n");
|
||||
printf("#define L2_LINESIZE 64\n");
|
||||
printf("#define DTB_DEFAULT_ENTRIES 64\n");
|
||||
printf("#define DTB_SIZE 4096\n");
|
||||
break;
|
||||
}
|
||||
get_cpucount();
|
||||
}
|
||||
|
|
86
getarch.c
86
getarch.c
|
@ -1232,7 +1232,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa53"
|
||||
#define CORENAME "CORTEXA53"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA57
|
||||
|
@ -1248,7 +1247,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa57"
|
||||
#define CORENAME "CORTEXA57"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA72
|
||||
|
@ -1264,7 +1262,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa72"
|
||||
#define CORENAME "CORTEXA72"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA73
|
||||
|
@ -1280,7 +1277,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa73"
|
||||
#define CORENAME "CORTEXA73"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXX1
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXX1"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXX1 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||
#define LIBNAME "cortexx1"
|
||||
#define CORENAME "CORTEXX1"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXX2
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXX2"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXX2 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||
#define LIBNAME "cortexx2"
|
||||
#define CORENAME "CORTEXX2"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA510
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXA510"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXA510 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||
#define LIBNAME "cortexa510"
|
||||
#define CORENAME "CORTEXA510"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA710
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "CORTEXA710"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DCORTEXA710 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
||||
#define LIBNAME "cortexa710"
|
||||
#define CORENAME "CORTEXA710"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_NEOVERSEN1
|
||||
|
@ -1297,7 +1349,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-march=armv8.2-a -mtune=neoverse-n1"
|
||||
#define LIBNAME "neoversen1"
|
||||
#define CORENAME "NEOVERSEN1"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_NEOVERSEV1
|
||||
|
@ -1314,7 +1365,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-march=armv8.4-a -mtune=neoverse-v1"
|
||||
#define LIBNAME "neoversev1"
|
||||
#define CORENAME "NEOVERSEV1"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -1332,7 +1382,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-march=armv8.5-a -mtune=neoverse-n2"
|
||||
#define LIBNAME "neoversen2"
|
||||
#define CORENAME "NEOVERSEN2"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_CORTEXA55
|
||||
|
@ -1348,7 +1397,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "cortexa55"
|
||||
#define CORENAME "CORTEXA55"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_FALKOR
|
||||
|
@ -1364,7 +1412,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "falkor"
|
||||
#define CORENAME "FALKOR"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_THUNDERX
|
||||
|
@ -1379,7 +1426,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "thunderx"
|
||||
#define CORENAME "THUNDERX"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_THUNDERX2T99
|
||||
|
@ -1397,7 +1443,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "thunderx2t99"
|
||||
#define CORENAME "THUNDERX2T99"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_TSV110
|
||||
|
@ -1413,7 +1458,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "tsv110"
|
||||
#define CORENAME "TSV110"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_EMAG8180
|
||||
|
@ -1448,7 +1492,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "thunderx3t110"
|
||||
#define CORENAME "THUNDERX3T110"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_VORTEX
|
||||
|
@ -1480,7 +1523,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8"
|
||||
#define LIBNAME "a64fx"
|
||||
#define CORENAME "A64FX"
|
||||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_FT2000
|
||||
#define ARMV8
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
#define SUBARCHITECTURE "FT2000"
|
||||
#define SUBDIRNAME "arm64"
|
||||
#define ARCHCONFIG "-DFT2000 " \
|
||||
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
|
||||
"-DL2_SIZE=33554426-DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||
#define LIBNAME "ft2000"
|
||||
#define CORENAME "FT2000"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ZARCH_GENERIC
|
||||
|
|
|
@ -0,0 +1,216 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,216 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,216 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,216 @@
|
|||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
||||
|
||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
||||
|
||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
||||
|
||||
|
||||
SAMAXKERNEL = amax.S
|
||||
DAMAXKERNEL = amax.S
|
||||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
SAXPYKERNEL = axpy.S
|
||||
DAXPYKERNEL = axpy.S
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
ZAXPYKERNEL = zaxpy.S
|
||||
|
||||
SROTKERNEL = rot.S
|
||||
DROTKERNEL = rot.S
|
||||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n.S
|
||||
DGEMVNKERNEL = gemv_n.S
|
||||
CGEMVNKERNEL = zgemv_n.S
|
||||
ZGEMVNKERNEL = zgemv_n.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t.S
|
||||
DGEMVTKERNEL = gemv_t.S
|
||||
CGEMVTKERNEL = zgemv_t.S
|
||||
ZGEMVTKERNEL = zgemv_t.S
|
||||
|
||||
|
||||
SASUMKERNEL = asum.S
|
||||
DASUMKERNEL = asum.S
|
||||
CASUMKERNEL = casum.S
|
||||
ZASUMKERNEL = zasum.S
|
||||
|
||||
SCOPYKERNEL = copy.S
|
||||
DCOPYKERNEL = copy.S
|
||||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
SSWAPKERNEL = swap.S
|
||||
DSWAPKERNEL = swap.S
|
||||
CSWAPKERNEL = swap.S
|
||||
ZSWAPKERNEL = swap.S
|
||||
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
DDOTKERNEL = dot.S
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
SDOTKERNEL = ../generic/dot.c
|
||||
else
|
||||
SDOTKERNEL = dot.S
|
||||
endif
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
else
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
endif
|
||||
DSDOTKERNEL = dot.S
|
||||
|
||||
DGEMM_BETA = dgemm_beta.S
|
||||
SGEMM_BETA = sgemm_beta.S
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
||||
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
||||
|
||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
||||
|
||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
||||
|
||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
||||
|
||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
@ -0,0 +1,3 @@
|
|||
include $(KERNELDIR)/KERNEL.CORTEXA57
|
||||
|
||||
|
4
param.h
4
param.h
|
@ -3130,7 +3130,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(CORTEXA57) || \
|
||||
defined(CORTEXA72) || defined(CORTEXA73) || \
|
||||
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
||||
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
#elif defined(ARMV8SVE) || defined(A64FX)
|
||||
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)
|
||||
|
||||
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||
|
|
Loading…
Reference in New Issue