Merge pull request #3588 from martin-frbg/fix3586
Fix mistaken declaration of CortexX1 as ArmV9 in PR#3586
This commit is contained in:
commit
d5ed695632
|
@ -238,18 +238,18 @@ endif
|
||||||
|
|
||||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||||
ifeq ($(CORE), CORTEXX1)
|
ifeq ($(CORE), CORTEXX1)
|
||||||
CCOMMON_OPT += -march=armv9 -mtune=cortexx1
|
CCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
|
||||||
ifneq ($(F_COMPILER), NAG)
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv9 -mtune=cortexx1
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortexa72
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||||
ifeq ($(CORE), CORTEXX2)
|
ifeq ($(CORE), CORTEXX2)
|
||||||
CCOMMON_OPT += -march=armv9 -mtune=cortexx2
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
ifneq ($(F_COMPILER), NAG)
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv9 -mtune=cortexx2
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
@ -266,9 +266,9 @@ endif
|
||||||
|
|
||||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||||
ifeq ($(CORE), CORTEXA710)
|
ifeq ($(CORE), CORTEXA710)
|
||||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
ifneq ($(F_COMPILER), NAG)
|
ifneq ($(F_COMPILER), NAG)
|
||||||
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortexa710
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -175,7 +175,7 @@ endif ()
|
||||||
|
|
||||||
if (${CORE} STREQUAL CORTEXX1)
|
if (${CORE} STREQUAL CORTEXX1)
|
||||||
if (NOT DYNAMIC_ARCH)
|
if (NOT DYNAMIC_ARCH)
|
||||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
|
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a")
|
||||||
endif ()
|
endif ()
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
|
@ -1288,7 +1288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
|
||||||
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8 -DARMV9"
|
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
|
||||||
#define LIBNAME "cortexx1"
|
#define LIBNAME "cortexx1"
|
||||||
#define CORENAME "CORTEXX1"
|
#define CORENAME "CORTEXX1"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1,216 +1 @@
|
||||||
SAMINKERNEL = ../arm/amin.c
|
include $(KERNELDIR)/KERNEL.CORTEXA57
|
||||||
DAMINKERNEL = ../arm/amin.c
|
|
||||||
CAMINKERNEL = ../arm/zamin.c
|
|
||||||
ZAMINKERNEL = ../arm/zamin.c
|
|
||||||
|
|
||||||
SMAXKERNEL = ../arm/max.c
|
|
||||||
DMAXKERNEL = ../arm/max.c
|
|
||||||
|
|
||||||
SMINKERNEL = ../arm/min.c
|
|
||||||
DMINKERNEL = ../arm/min.c
|
|
||||||
|
|
||||||
ISAMINKERNEL = ../arm/iamin.c
|
|
||||||
IDAMINKERNEL = ../arm/iamin.c
|
|
||||||
ICAMINKERNEL = ../arm/izamin.c
|
|
||||||
IZAMINKERNEL = ../arm/izamin.c
|
|
||||||
|
|
||||||
ISMAXKERNEL = ../arm/imax.c
|
|
||||||
IDMAXKERNEL = ../arm/imax.c
|
|
||||||
|
|
||||||
ISMINKERNEL = ../arm/imin.c
|
|
||||||
IDMINKERNEL = ../arm/imin.c
|
|
||||||
|
|
||||||
STRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
|
||||||
STRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
|
||||||
STRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
|
||||||
STRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
|
||||||
|
|
||||||
DTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
|
||||||
DTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
|
||||||
DTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
|
||||||
DTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
|
||||||
|
|
||||||
TRSMCOPYLN_M = trsm_lncopy_sve.c
|
|
||||||
TRSMCOPYLT_M = trsm_ltcopy_sve.c
|
|
||||||
TRSMCOPYUN_M = trsm_uncopy_sve.c
|
|
||||||
TRSMCOPYUT_M = trsm_utcopy_sve.c
|
|
||||||
|
|
||||||
CTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
|
||||||
CTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
|
||||||
CTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
|
||||||
CTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
|
||||||
|
|
||||||
ZTRSMKERNEL_LN = trsm_kernel_LN_sve.c
|
|
||||||
ZTRSMKERNEL_LT = trsm_kernel_LT_sve.c
|
|
||||||
ZTRSMKERNEL_RN = trsm_kernel_RN_sve.c
|
|
||||||
ZTRSMKERNEL_RT = trsm_kernel_RT_sve.c
|
|
||||||
|
|
||||||
ZTRSMCOPYLN_M = ztrsm_lncopy_sve.c
|
|
||||||
ZTRSMCOPYLT_M = ztrsm_ltcopy_sve.c
|
|
||||||
ZTRSMCOPYUN_M = ztrsm_uncopy_sve.c
|
|
||||||
ZTRSMCOPYUT_M = ztrsm_utcopy_sve.c
|
|
||||||
|
|
||||||
|
|
||||||
SAMAXKERNEL = amax.S
|
|
||||||
DAMAXKERNEL = amax.S
|
|
||||||
CAMAXKERNEL = zamax.S
|
|
||||||
ZAMAXKERNEL = zamax.S
|
|
||||||
|
|
||||||
SAXPYKERNEL = axpy.S
|
|
||||||
DAXPYKERNEL = axpy.S
|
|
||||||
CAXPYKERNEL = zaxpy.S
|
|
||||||
ZAXPYKERNEL = zaxpy.S
|
|
||||||
|
|
||||||
SROTKERNEL = rot.S
|
|
||||||
DROTKERNEL = rot.S
|
|
||||||
CROTKERNEL = zrot.S
|
|
||||||
ZROTKERNEL = zrot.S
|
|
||||||
|
|
||||||
SSCALKERNEL = scal.S
|
|
||||||
DSCALKERNEL = scal.S
|
|
||||||
CSCALKERNEL = zscal.S
|
|
||||||
ZSCALKERNEL = zscal.S
|
|
||||||
|
|
||||||
SGEMVNKERNEL = gemv_n.S
|
|
||||||
DGEMVNKERNEL = gemv_n.S
|
|
||||||
CGEMVNKERNEL = zgemv_n.S
|
|
||||||
ZGEMVNKERNEL = zgemv_n.S
|
|
||||||
|
|
||||||
SGEMVTKERNEL = gemv_t.S
|
|
||||||
DGEMVTKERNEL = gemv_t.S
|
|
||||||
CGEMVTKERNEL = zgemv_t.S
|
|
||||||
ZGEMVTKERNEL = zgemv_t.S
|
|
||||||
|
|
||||||
|
|
||||||
SASUMKERNEL = asum.S
|
|
||||||
DASUMKERNEL = asum.S
|
|
||||||
CASUMKERNEL = casum.S
|
|
||||||
ZASUMKERNEL = zasum.S
|
|
||||||
|
|
||||||
SCOPYKERNEL = copy.S
|
|
||||||
DCOPYKERNEL = copy.S
|
|
||||||
CCOPYKERNEL = copy.S
|
|
||||||
ZCOPYKERNEL = copy.S
|
|
||||||
|
|
||||||
SSWAPKERNEL = swap.S
|
|
||||||
DSWAPKERNEL = swap.S
|
|
||||||
CSWAPKERNEL = swap.S
|
|
||||||
ZSWAPKERNEL = swap.S
|
|
||||||
|
|
||||||
ISAMAXKERNEL = iamax.S
|
|
||||||
IDAMAXKERNEL = iamax.S
|
|
||||||
ICAMAXKERNEL = izamax.S
|
|
||||||
IZAMAXKERNEL = izamax.S
|
|
||||||
|
|
||||||
SNRM2KERNEL = nrm2.S
|
|
||||||
DNRM2KERNEL = nrm2.S
|
|
||||||
CNRM2KERNEL = znrm2.S
|
|
||||||
ZNRM2KERNEL = znrm2.S
|
|
||||||
|
|
||||||
DDOTKERNEL = dot.S
|
|
||||||
ifneq ($(C_COMPILER), PGI)
|
|
||||||
SDOTKERNEL = ../generic/dot.c
|
|
||||||
else
|
|
||||||
SDOTKERNEL = dot.S
|
|
||||||
endif
|
|
||||||
ifneq ($(C_COMPILER), PGI)
|
|
||||||
CDOTKERNEL = zdot.S
|
|
||||||
ZDOTKERNEL = zdot.S
|
|
||||||
else
|
|
||||||
CDOTKERNEL = ../arm/zdot.c
|
|
||||||
ZDOTKERNEL = ../arm/zdot.c
|
|
||||||
endif
|
|
||||||
DSDOTKERNEL = dot.S
|
|
||||||
|
|
||||||
DGEMM_BETA = dgemm_beta.S
|
|
||||||
SGEMM_BETA = sgemm_beta.S
|
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
|
|
||||||
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
|
|
||||||
|
|
||||||
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
|
|
||||||
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
|
|
||||||
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
|
|
||||||
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
|
|
||||||
|
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
|
|
||||||
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
|
||||||
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
|
||||||
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
|
||||||
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
|
||||||
|
|
||||||
SSYMMUCOPY_M = symm_ucopy_sve.c
|
|
||||||
SSYMMLCOPY_M = symm_lcopy_sve.c
|
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
|
|
||||||
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
|
|
||||||
|
|
||||||
DGEMMINCOPY = dgemm_ncopy_sve_v1.c
|
|
||||||
DGEMMITCOPY = dgemm_tcopy_sve_v1.c
|
|
||||||
DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
|
|
||||||
DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
|
|
||||||
|
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
|
|
||||||
DTRMMUNCOPY_M = trmm_uncopy_sve_v1.c
|
|
||||||
DTRMMLNCOPY_M = trmm_lncopy_sve_v1.c
|
|
||||||
DTRMMUTCOPY_M = trmm_utcopy_sve_v1.c
|
|
||||||
DTRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
|
|
||||||
|
|
||||||
DSYMMUCOPY_M = symm_ucopy_sve.c
|
|
||||||
DSYMMLCOPY_M = symm_lcopy_sve.c
|
|
||||||
|
|
||||||
CGEMMKERNEL = cgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
|
||||||
CTRMMKERNEL = ctrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
|
||||||
|
|
||||||
CGEMMINCOPY = cgemm_ncopy_sve_v1.c
|
|
||||||
CGEMMITCOPY = cgemm_tcopy_sve_v1.c
|
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
|
||||||
|
|
||||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
|
|
||||||
CTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
|
||||||
CTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
|
||||||
CTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
|
||||||
CTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
|
||||||
|
|
||||||
CHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
|
||||||
CHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
|
||||||
|
|
||||||
CSYMMUCOPY_M = zsymm_ucopy_sve.c
|
|
||||||
CSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
||||||
|
|
||||||
ZGEMMKERNEL = zgemm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
|
||||||
ZTRMMKERNEL = ztrmm_kernel_sve_v1x$(ZGEMM_UNROLL_N).S
|
|
||||||
|
|
||||||
ZGEMMINCOPY = zgemm_ncopy_sve_v1.c
|
|
||||||
ZGEMMITCOPY = zgemm_tcopy_sve_v1.c
|
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
|
||||||
|
|
||||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
|
|
||||||
ZTRMMUNCOPY_M = ztrmm_uncopy_sve_v1.c
|
|
||||||
ZTRMMLNCOPY_M = ztrmm_lncopy_sve_v1.c
|
|
||||||
ZTRMMUTCOPY_M = ztrmm_utcopy_sve_v1.c
|
|
||||||
ZTRMMLTCOPY_M = ztrmm_ltcopy_sve_v1.c
|
|
||||||
|
|
||||||
ZHEMMLTCOPY_M = zhemm_ltcopy_sve.c
|
|
||||||
ZHEMMUTCOPY_M = zhemm_utcopy_sve.c
|
|
||||||
|
|
||||||
ZSYMMUCOPY_M = zsymm_ucopy_sve.c
|
|
||||||
ZSYMMLCOPY_M = zsymm_lcopy_sve.c
|
|
||||||
|
|
6
param.h
6
param.h
|
@ -3128,7 +3128,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
|
|
||||||
#if defined(CORTEXA57) || \
|
#if defined(CORTEXA57) || defined(CORTEXX1) || \
|
||||||
defined(CORTEXA72) || defined(CORTEXA73) || \
|
defined(CORTEXA72) || defined(CORTEXA73) || \
|
||||||
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
|
defined(FALKOR) || defined(TSV110) || defined(EMAG8180) || defined(VORTEX) || defined(FT2000)
|
||||||
|
|
||||||
|
@ -3147,7 +3147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
/*FIXME: this should be using the cache size, but there is currently no easy way to
|
||||||
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
query that on ARM. So if getarch counted more than 8 cores we simply assume the host
|
||||||
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
is a big desktop or server with abundant cache rather than a phone or embedded device */
|
||||||
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)
|
#if NUM_CORES > 8 || defined(TSV110) || defined(EMAG8180) || defined(VORTEX)|| defined(CORTEXX1)
|
||||||
#define SGEMM_DEFAULT_P 512
|
#define SGEMM_DEFAULT_P 512
|
||||||
#define DGEMM_DEFAULT_P 256
|
#define DGEMM_DEFAULT_P 256
|
||||||
#define CGEMM_DEFAULT_P 256
|
#define CGEMM_DEFAULT_P 256
|
||||||
|
@ -3377,7 +3377,7 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)
|
#elif defined(ARMV8SVE) || defined(A64FX) || defined(ARMV9) || defined(CORTEXA510)|| defined(CORTEXA710) || defined(CORTEXX2)
|
||||||
|
|
||||||
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
|
||||||
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
|
||||||
|
|
Loading…
Reference in New Issue