diff --git a/KERNEL.CORTEXA55 b/KERNEL.CORTEXA55 new file mode 100644 index 000000000..db322dd0d --- /dev/null +++ b/KERNEL.CORTEXA55 @@ -0,0 +1,196 @@ +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +SAMAXKERNEL = amax.S +DAMAXKERNEL = amax.S +CAMAXKERNEL = zamax.S +ZAMAXKERNEL = zamax.S + +SAXPYKERNEL = axpy.S +DAXPYKERNEL = axpy.S +CAXPYKERNEL = zaxpy.S +ZAXPYKERNEL = zaxpy.S + +SROTKERNEL = rot.S +DROTKERNEL = rot.S +CROTKERNEL = zrot.S +ZROTKERNEL = zrot.S + +SSCALKERNEL = scal.S +DSCALKERNEL = scal.S +CSCALKERNEL = zscal.S +ZSCALKERNEL = zscal.S + +SGEMVNKERNEL = gemv_n.S +DGEMVNKERNEL = gemv_n.S +CGEMVNKERNEL = zgemv_n.S +ZGEMVNKERNEL = zgemv_n.S + +SGEMVTKERNEL = gemv_t.S +DGEMVTKERNEL = gemv_t.S +CGEMVTKERNEL = zgemv_t.S +ZGEMVTKERNEL = zgemv_t.S + + +SASUMKERNEL = asum.S +DASUMKERNEL = asum.S +CASUMKERNEL = casum.S +ZASUMKERNEL = zasum.S + +SCOPYKERNEL = copy.S +DCOPYKERNEL = copy.S +CCOPYKERNEL = copy.S +ZCOPYKERNEL = copy.S + +SSWAPKERNEL = swap.S +DSWAPKERNEL = swap.S +CSWAPKERNEL = swap.S +ZSWAPKERNEL = swap.S + +ISAMAXKERNEL = iamax.S +IDAMAXKERNEL = iamax.S +ICAMAXKERNEL = izamax.S +IZAMAXKERNEL = izamax.S + +SNRM2KERNEL = nrm2.S +DNRM2KERNEL = nrm2.S +CNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S + +ifneq ($(C_COMPILER), PGI) +SDOTKERNEL = ../generic/dot.c +else +SDOTKERNEL = dot.S +endif +DDOTKERNEL = dot.S +ifneq ($(C_COMPILER), PGI) +CDOTKERNEL = zdot.S +ZDOTKERNEL = zdot.S +else +CDOTKERNEL = ../arm/zdot.c +ZDOTKERNEL = ../arm/zdot.c +endif +DSDOTKERNEL = dot.S + +DGEMM_BETA = dgemm_beta.S +SGEMM_BETA = sgemm_beta.S + +ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 8x8) +SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_cortexa53.S +STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_cortexa53.S +else +SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S +STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S +endif +ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) +ifeq ($(SGEMM_UNROLL_M), 16) +SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S +else +SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c +endif +ifeq ($(SGEMM_UNROLL_M), 4) +SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S +else +SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c +endif +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif + +SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S +SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) + +DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S +DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S + +ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) + +ifeq ($(DGEMM_UNROLL_M), 8) +DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S +DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S +else +DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c +DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c +endif + +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif + +ifeq ($(DGEMM_UNROLL_N), 4) +DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S +DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S +else +DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c +DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c +endif + +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) + +CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S +CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S +ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) +CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c +CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif +CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c +CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) + +ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S +ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S +ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) +ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c +ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif +ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) diff --git a/Makefile.arm64 b/Makefile.arm64 index 3858d7e3f..c23a0876e 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -57,6 +57,28 @@ endif endif endif +# Use a53 tunings because a55 is only available in GCC>=8.1 +ifeq ($(CORE), CORTEXA55) +ifeq ($(GCCVERSIONGTEQ7), 1) +ifeq ($(GCCVERSIONGTEQ8), 1) +CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 +endif +else +CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53 +endif +endif +else +CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 +endif +endif +endif + ifeq ($(CORE), THUNDERX) CCOMMON_OPT += -march=armv8-a -mtune=thunderx ifneq ($(F_COMPILER), NAG) diff --git a/Makefile.system b/Makefile.system index 2264b143b..0cd3e3a7c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -625,6 +625,7 @@ DYNAMIC_CORE += CORTEXA57 DYNAMIC_CORE += CORTEXA72 DYNAMIC_CORE += CORTEXA73 DYNAMIC_CORE += NEOVERSEN1 +DYNAMIC_CORE += CORTEXA55 DYNAMIC_CORE += FALKOR DYNAMIC_CORE += THUNDERX DYNAMIC_CORE += THUNDERX2T99 diff --git a/TargetList.txt b/TargetList.txt index d19964916..f93a629d8 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -92,6 +92,7 @@ CORTEXA57 CORTEXA72 CORTEXA73 NEOVERSEN1 +CORTEXA55 EMAG8180 FALKOR THUNDERX diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 5f5d7771b..a150301d1 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -36,6 +36,7 @@ size_t length=sizeof(value); #define CPU_ARMV8 1 // Arm #define CPU_CORTEXA53 2 +#define CPU_CORTEXA55 14 #define CPU_CORTEXA57 3 #define CPU_CORTEXA72 4 #define CPU_CORTEXA73 5 @@ -67,7 +68,8 @@ static char *cpuname[] = { "EMAG8180", "NEOVERSEN1", "THUNDERX3T110", - "VORTEX" + "VORTEX", + "CORTEXA55" }; static char *cpuname_lower[] = { @@ -84,7 +86,8 @@ static char *cpuname_lower[] = { "emag8180", "neoversen1", "thunderx3t110", - "vortex" + "vortex", + "cortexa55" }; int get_feature(char *search) @@ -161,6 +164,8 @@ int detect(void) return CPU_CORTEXA73; else if (strstr(cpu_part, "0xd0c")) return CPU_NEOVERSEN1; + else if (strstr(cpu_part, "0xd05")) + return CPU_CORTEXA55; } // Qualcomm else if (strstr(cpu_implementer, "0x51") && strstr(cpu_part, "0xc00")) @@ -284,6 +289,8 @@ void get_cpuconfig(void) printf("#define %s\n", cpuname[d]); // Fall-through case CPU_ARMV8: + // case CPU_CORTEXA53; + // case CPU_CORTEXA55; // Minimum parameters for ARMv8 (based on A53) printf("#define L1_DATA_SIZE 32768\n"); printf("#define L1_DATA_LINESIZE 64\n"); diff --git a/getarch.c b/getarch.c index f48944f36..3bc8a0c3d 100644 --- a/getarch.c +++ b/getarch.c @@ -1159,6 +1159,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_CORTEXA55 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "CORTEXA55" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DCORTEXA55 " \ + "-DL1_CODE_SIZE=16384 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=3 " \ + "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=2 " \ + "-DL2_SIZE=65536 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=16 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" +#define LIBNAME "cortexa55" +#define CORENAME "CORTEXA55" +#else +#endif #ifdef FORCE_FALKOR #define FORCE diff --git a/param.h b/param.h index ddad2fb36..01048023f 100644 --- a/param.h +++ b/param.h @@ -2959,7 +2959,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 2048 -#elif defined(CORTEXA53) +#elif defined(CORTEXA53) || defined(CORTEXA55) #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 8