From 95d37e15754955f5c73195d2ca09208e99600ab9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 24 Jul 2020 10:13:46 +0000 Subject: [PATCH] Regroup the 32 and 64bit sections and restore 64bit CAXPY --- kernel/power/KERNEL.POWER8 | 49 ++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 23 deletions(-) diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index bb93a6a23..cbf285913 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -1,3 +1,4 @@ +# Big-endian 32bit (AIX) is supported through the POWER6 GEMM kernels, no separate TRMM ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1) SGEMMKERNEL = gemm_kernel_power6.S SGEMMINCOPY = @@ -35,12 +36,6 @@ ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) -DTRSMKERNEL_LN = trsm_kernel_power6_LN.S -DTRSMKERNEL_LT = trsm_kernel_power6_LT.S -DTRSMKERNEL_RN = trsm_kernel_power6_LT.S -DTRSMKERNEL_RT = trsm_kernel_power6_RT.S - -CAXPYKERNEL = zaxpy.S else @@ -93,10 +88,6 @@ ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) -DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S -DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c endif STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c @@ -104,10 +95,17 @@ STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c -#DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -#DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S -#DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -#DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1) +DTRSMKERNEL_LN = trsm_kernel_power6_LN.S +DTRSMKERNEL_LT = trsm_kernel_power6_LT.S +DTRSMKERNEL_RN = trsm_kernel_power6_LT.S +DTRSMKERNEL_RT = trsm_kernel_power6_RT.S +else +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c @@ -204,15 +202,20 @@ ZASUMKERNEL = zasum.c # SAXPYKERNEL = saxpy.c DAXPYKERNEL = daxpy.c -#ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) -#ifneq ($(GCCVERSIONGTEQ9),1) -#CAXPYKERNEL = caxpy_power8.S -#else -#CAXPYKERNEL = caxpy.c -#endif -#else -#CAXPYKERNEL = caxpy.c -#endif +รค +ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1) +CAXPYKERNEL = zaxpy.S +else +ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) +ifneq ($(GCCVERSIONGTEQ9),1) +CAXPYKERNEL = caxpy_power8.S +else +CAXPYKERNEL = caxpy.c +endif +else +CAXPYKERNEL = caxpy.c +endif +endif # ZAXPYKERNEL = zaxpy.c #