Use POWER6 GEMM, TRMM and DTRSM on 32bit POWER8

This commit is contained in:
Martin Kroeker 2020-07-14 18:11:19 +02:00 committed by GitHub
parent b144423f0f
commit f8c2697701
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 68 additions and 16 deletions

View File

@ -1,3 +1,51 @@
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
$(info baue power6)
SGEMMKERNEL = gemm_kernel_power6.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = gemm_ncopy_4.S
SGEMMOTCOPY = gemm_tcopy_4.S
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_power6.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = gemm_ncopy_4.S
DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_power6.S
CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_power6.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRSMKERNEL_LN = trsm_kernel_power6_LN.S
DTRSMKERNEL_LT = trsm_kernel_power6_LT.S
DTRSMKERNEL_RN = trsm_kernel_power6_LT.S
DTRSMKERNEL_RT = trsm_kernel_power6_RT.S
CAXPYKERNEL = zaxpy.S
else
$(info baue power8)
#SGEMM_BETA = ../generic/gemm_beta.c
#DGEMM_BETA = ../generic/gemm_beta.c
#CGEMM_BETA = ../generic/zgemm_beta.c
@ -47,16 +95,21 @@ ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
endif
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
#DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
#DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
#DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
#DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
@ -153,15 +206,15 @@ ZASUMKERNEL = zasum.c
#
SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power8.S
else
CAXPYKERNEL = caxpy.c
endif
else
CAXPYKERNEL = caxpy.c
endif
#ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
#ifneq ($(GCCVERSIONGTEQ9),1)
#CAXPYKERNEL = caxpy_power8.S
#else
#CAXPYKERNEL = caxpy.c
#endif
#else
#CAXPYKERNEL = caxpy.c
#endif
#
ZAXPYKERNEL = zaxpy.c
#
@ -173,7 +226,7 @@ ZCOPYKERNEL = zcopy.c
SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c
DSDOTKERNEL = sdot.c
CDOTKERNEL = cdot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = zdot.c
#
SNRM2KERNEL = ../arm/nrm2.c
@ -183,7 +236,7 @@ ZNRM2KERNEL = ../arm/znrm2.c
#
SROTKERNEL = srot.c
DROTKERNEL = drot.c
CROTKERNEL = crot.c
#CROTKERNEL = crot.c
ZROTKERNEL = zrot.c
#
SSCALKERNEL = sscal.c
@ -239,4 +292,3 @@ IDAMINKERNEL = ../arm/iamin.c
IZAMAXKERNEL = ../arm/izamax.c
IZAMINKERNEL = ../arm/izamin.c
endif