diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index dfdaf5cf4..d5de070a5 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -44,8 +44,10 @@ USE_TRMM = 1 endif ifeq ($(CORE), POWER8) +ifeq ($(BINARY64),1) USE_TRMM = 1 endif +endif ifeq ($(CORE), POWER9) USE_TRMM = 1 diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index 7fba5b4d6..c2f4cd204 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -1,3 +1,44 @@ +# Big-endian 32bit (AIX) is supported through the POWER6 GEMM kernels, no separate TRMM +ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1) +SGEMMKERNEL = gemm_kernel_power6.S +SGEMMINCOPY = +SGEMMITCOPY = +SGEMMONCOPY = gemm_ncopy_4.S +SGEMMOTCOPY = gemm_tcopy_4.S +SGEMMINCOPYOBJ = +SGEMMITCOPYOBJ = +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) +DGEMMKERNEL = gemm_kernel_power6.S +DGEMMINCOPY = +DGEMMITCOPY = +DGEMMONCOPY = gemm_ncopy_4.S +DGEMMOTCOPY = gemm_tcopy_4.S +DGEMMINCOPYOBJ = +DGEMMITCOPYOBJ = +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) +CGEMMKERNEL = zgemm_kernel_power6.S +CGEMMINCOPY = ../generic/zgemm_ncopy_2.c +CGEMMITCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_4.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) +ZGEMMKERNEL = zgemm_kernel_power6.S +ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) + +else + #SGEMM_BETA = ../generic/gemm_beta.c #DGEMM_BETA = ../generic/gemm_beta.c #CGEMM_BETA = ../generic/zgemm_beta.c @@ -47,16 +88,24 @@ ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1) +DTRSMKERNEL_LN = trsm_kernel_power6_LN.S +DTRSMKERNEL_LT = trsm_kernel_power6_LT.S +DTRSMKERNEL_RN = trsm_kernel_power6_LT.S +DTRSMKERNEL_RT = trsm_kernel_power6_RT.S +else DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c @@ -153,6 +202,10 @@ ZASUMKERNEL = zasum.c # SAXPYKERNEL = saxpy.c DAXPYKERNEL = daxpy.c +# +ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1) +CAXPYKERNEL = zaxpy.S +else ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) ifneq ($(GCCVERSIONGTEQ9),1) CAXPYKERNEL = caxpy_power8.S @@ -162,6 +215,7 @@ endif else CAXPYKERNEL = caxpy.c endif +endif # ZAXPYKERNEL = zaxpy.c # @@ -239,4 +293,3 @@ IDAMINKERNEL = ../arm/iamin.c IZAMAXKERNEL = ../arm/izamax.c IZAMINKERNEL = ../arm/izamin.c endif - diff --git a/param.h b/param.h index e8cf53f0a..efe0e1096 100644 --- a/param.h +++ b/param.h @@ -2225,7 +2225,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_B 65536 #define GEMM_DEFAULT_ALIGN 0x0ffffUL - +#if defined(__32BIT__) +#warning using BINARY32==POWER6 +#define SGEMM_DEFAULT_UNROLL_M 4 +#define SGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_N 4 +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 4 +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 4 +#else #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 #define DGEMM_DEFAULT_UNROLL_M 16 @@ -2234,7 +2244,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_M 8 #define ZGEMM_DEFAULT_UNROLL_N 2 - +#endif #define SGEMM_DEFAULT_P 1280UL #define DGEMM_DEFAULT_P 640UL #define CGEMM_DEFAULT_P 640UL