Use POWER6 GEMM, TRMM and DTRSM on 32bit POWER8
This commit is contained in:
parent
b144423f0f
commit
f8c2697701
|
@ -1,3 +1,51 @@
|
|||
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
|
||||
$(info baue power6)
|
||||
SGEMMKERNEL = gemm_kernel_power6.S
|
||||
SGEMMINCOPY =
|
||||
SGEMMITCOPY =
|
||||
SGEMMONCOPY = gemm_ncopy_4.S
|
||||
SGEMMOTCOPY = gemm_tcopy_4.S
|
||||
SGEMMINCOPYOBJ =
|
||||
SGEMMITCOPYOBJ =
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMKERNEL = gemm_kernel_power6.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMKERNEL = zgemm_kernel_power6.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMKERNEL = zgemm_kernel_power6.S
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
DTRSMKERNEL_LN = trsm_kernel_power6_LN.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_power6_LT.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_power6_LT.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_power6_RT.S
|
||||
|
||||
CAXPYKERNEL = zaxpy.S
|
||||
|
||||
else
|
||||
|
||||
$(info baue power8)
|
||||
#SGEMM_BETA = ../generic/gemm_beta.c
|
||||
#DGEMM_BETA = ../generic/gemm_beta.c
|
||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||
|
@ -47,16 +95,21 @@ ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
|||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
endif
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
#DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
#DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
||||
#DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
#DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
|
@ -153,15 +206,15 @@ ZASUMKERNEL = zasum.c
|
|||
#
|
||||
SAXPYKERNEL = saxpy.c
|
||||
DAXPYKERNEL = daxpy.c
|
||||
ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
CAXPYKERNEL = caxpy_power8.S
|
||||
else
|
||||
CAXPYKERNEL = caxpy.c
|
||||
endif
|
||||
else
|
||||
CAXPYKERNEL = caxpy.c
|
||||
endif
|
||||
#ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
||||
#ifneq ($(GCCVERSIONGTEQ9),1)
|
||||
#CAXPYKERNEL = caxpy_power8.S
|
||||
#else
|
||||
#CAXPYKERNEL = caxpy.c
|
||||
#endif
|
||||
#else
|
||||
#CAXPYKERNEL = caxpy.c
|
||||
#endif
|
||||
#
|
||||
ZAXPYKERNEL = zaxpy.c
|
||||
#
|
||||
|
@ -173,7 +226,7 @@ ZCOPYKERNEL = zcopy.c
|
|||
SDOTKERNEL = sdot.c
|
||||
DDOTKERNEL = ddot.c
|
||||
DSDOTKERNEL = sdot.c
|
||||
CDOTKERNEL = cdot.c
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = zdot.c
|
||||
#
|
||||
SNRM2KERNEL = ../arm/nrm2.c
|
||||
|
@ -183,7 +236,7 @@ ZNRM2KERNEL = ../arm/znrm2.c
|
|||
#
|
||||
SROTKERNEL = srot.c
|
||||
DROTKERNEL = drot.c
|
||||
CROTKERNEL = crot.c
|
||||
#CROTKERNEL = crot.c
|
||||
ZROTKERNEL = zrot.c
|
||||
#
|
||||
SSCALKERNEL = sscal.c
|
||||
|
@ -239,4 +292,3 @@ IDAMINKERNEL = ../arm/iamin.c
|
|||
IZAMAXKERNEL = ../arm/izamax.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
endif
|
||||
|
||||
|
|
Loading…
Reference in New Issue