Merge pull request #2720 from martin-frbg/issue2694

WIP Further fixes for 32bit POWER8
This commit is contained in:
Martin Kroeker 2020-07-24 23:19:45 +02:00 committed by GitHub
commit 6a2a60038c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 68 additions and 3 deletions

View File

@ -44,8 +44,10 @@ USE_TRMM = 1
endif endif
ifeq ($(CORE), POWER8) ifeq ($(CORE), POWER8)
ifeq ($(BINARY64),1)
USE_TRMM = 1 USE_TRMM = 1
endif endif
endif
ifeq ($(CORE), POWER9) ifeq ($(CORE), POWER9)
USE_TRMM = 1 USE_TRMM = 1

View File

@ -1,3 +1,44 @@
# Big-endian 32bit (AIX) is supported through the POWER6 GEMM kernels, no separate TRMM
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
SGEMMKERNEL = gemm_kernel_power6.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = gemm_ncopy_4.S
SGEMMOTCOPY = gemm_tcopy_4.S
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_power6.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = gemm_ncopy_4.S
DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_power6.S
CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_power6.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
else
#SGEMM_BETA = ../generic/gemm_beta.c #SGEMM_BETA = ../generic/gemm_beta.c
#DGEMM_BETA = ../generic/gemm_beta.c #DGEMM_BETA = ../generic/gemm_beta.c
#CGEMM_BETA = ../generic/zgemm_beta.c #CGEMM_BETA = ../generic/zgemm_beta.c
@ -47,16 +88,24 @@ ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
DTRSMKERNEL_LN = trsm_kernel_power6_LN.S
DTRSMKERNEL_LT = trsm_kernel_power6_LT.S
DTRSMKERNEL_RN = trsm_kernel_power6_LT.S
DTRSMKERNEL_RT = trsm_kernel_power6_RT.S
else
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
endif
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
@ -153,6 +202,10 @@ ZASUMKERNEL = zasum.c
# #
SAXPYKERNEL = saxpy.c SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c DAXPYKERNEL = daxpy.c
#
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
CAXPYKERNEL = zaxpy.S
else
ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__) ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
ifneq ($(GCCVERSIONGTEQ9),1) ifneq ($(GCCVERSIONGTEQ9),1)
CAXPYKERNEL = caxpy_power8.S CAXPYKERNEL = caxpy_power8.S
@ -162,6 +215,7 @@ endif
else else
CAXPYKERNEL = caxpy.c CAXPYKERNEL = caxpy.c
endif endif
endif
# #
ZAXPYKERNEL = zaxpy.c ZAXPYKERNEL = zaxpy.c
# #
@ -239,4 +293,3 @@ IDAMINKERNEL = ../arm/iamin.c
IZAMAXKERNEL = ../arm/izamax.c IZAMAXKERNEL = ../arm/izamax.c
IZAMINKERNEL = ../arm/izamin.c IZAMINKERNEL = ../arm/izamin.c
endif endif

14
param.h
View File

@ -2225,7 +2225,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 0 #define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 65536 #define GEMM_DEFAULT_OFFSET_B 65536
#define GEMM_DEFAULT_ALIGN 0x0ffffUL #define GEMM_DEFAULT_ALIGN 0x0ffffUL
#if defined(__32BIT__)
#warning using BINARY32==POWER6
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 4
#else
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8 #define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_M 16 #define DGEMM_DEFAULT_UNROLL_M 16
@ -2234,7 +2244,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 8 #define ZGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2
#endif
#define SGEMM_DEFAULT_P 1280UL #define SGEMM_DEFAULT_P 1280UL
#define DGEMM_DEFAULT_P 640UL #define DGEMM_DEFAULT_P 640UL
#define CGEMM_DEFAULT_P 640UL #define CGEMM_DEFAULT_P 640UL