Merge pull request #2720 from martin-frbg/issue2694
WIP Further fixes for 32bit POWER8
This commit is contained in:
commit
6a2a60038c
|
@ -44,8 +44,10 @@ USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
|
ifeq ($(BINARY64),1)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER9)
|
ifeq ($(CORE), POWER9)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
|
|
|
@ -1,3 +1,44 @@
|
||||||
|
# Big-endian 32bit (AIX) is supported through the POWER6 GEMM kernels, no separate TRMM
|
||||||
|
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
|
||||||
|
SGEMMKERNEL = gemm_kernel_power6.S
|
||||||
|
SGEMMINCOPY =
|
||||||
|
SGEMMITCOPY =
|
||||||
|
SGEMMONCOPY = gemm_ncopy_4.S
|
||||||
|
SGEMMOTCOPY = gemm_tcopy_4.S
|
||||||
|
SGEMMINCOPYOBJ =
|
||||||
|
SGEMMITCOPYOBJ =
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMKERNEL = gemm_kernel_power6.S
|
||||||
|
DGEMMINCOPY =
|
||||||
|
DGEMMITCOPY =
|
||||||
|
DGEMMONCOPY = gemm_ncopy_4.S
|
||||||
|
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||||
|
DGEMMINCOPYOBJ =
|
||||||
|
DGEMMITCOPYOBJ =
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMKERNEL = zgemm_kernel_power6.S
|
||||||
|
CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMKERNEL = zgemm_kernel_power6.S
|
||||||
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
#SGEMM_BETA = ../generic/gemm_beta.c
|
#SGEMM_BETA = ../generic/gemm_beta.c
|
||||||
#DGEMM_BETA = ../generic/gemm_beta.c
|
#DGEMM_BETA = ../generic/gemm_beta.c
|
||||||
#CGEMM_BETA = ../generic/zgemm_beta.c
|
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
@ -47,16 +88,24 @@ ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
endif
|
||||||
|
|
||||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
|
||||||
|
DTRSMKERNEL_LN = trsm_kernel_power6_LN.S
|
||||||
|
DTRSMKERNEL_LT = trsm_kernel_power6_LT.S
|
||||||
|
DTRSMKERNEL_RN = trsm_kernel_power6_LT.S
|
||||||
|
DTRSMKERNEL_RT = trsm_kernel_power6_RT.S
|
||||||
|
else
|
||||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
||||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
endif
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
@ -153,6 +202,10 @@ ZASUMKERNEL = zasum.c
|
||||||
#
|
#
|
||||||
SAXPYKERNEL = saxpy.c
|
SAXPYKERNEL = saxpy.c
|
||||||
DAXPYKERNEL = daxpy.c
|
DAXPYKERNEL = daxpy.c
|
||||||
|
#
|
||||||
|
ifeq ($(__BYTE_ORDER__)$(BINARY32),__ORDER_BIG_ENDIAN__1)
|
||||||
|
CAXPYKERNEL = zaxpy.S
|
||||||
|
else
|
||||||
ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
ifneq ($(__BYTE_ORDER__),__ORDER_BIG_ENDIAN__)
|
||||||
ifneq ($(GCCVERSIONGTEQ9),1)
|
ifneq ($(GCCVERSIONGTEQ9),1)
|
||||||
CAXPYKERNEL = caxpy_power8.S
|
CAXPYKERNEL = caxpy_power8.S
|
||||||
|
@ -162,6 +215,7 @@ endif
|
||||||
else
|
else
|
||||||
CAXPYKERNEL = caxpy.c
|
CAXPYKERNEL = caxpy.c
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
#
|
#
|
||||||
ZAXPYKERNEL = zaxpy.c
|
ZAXPYKERNEL = zaxpy.c
|
||||||
#
|
#
|
||||||
|
@ -239,4 +293,3 @@ IDAMINKERNEL = ../arm/iamin.c
|
||||||
IZAMAXKERNEL = ../arm/izamax.c
|
IZAMAXKERNEL = ../arm/izamax.c
|
||||||
IZAMINKERNEL = ../arm/izamin.c
|
IZAMINKERNEL = ../arm/izamin.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
14
param.h
14
param.h
|
@ -2225,7 +2225,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
#define GEMM_DEFAULT_OFFSET_B 65536
|
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||||
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
||||||
|
#if defined(__32BIT__)
|
||||||
|
#warning using BINARY32==POWER6
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#else
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 16
|
#define DGEMM_DEFAULT_UNROLL_M 16
|
||||||
|
@ -2234,7 +2244,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 8
|
#define ZGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
#endif
|
||||||
#define SGEMM_DEFAULT_P 1280UL
|
#define SGEMM_DEFAULT_P 1280UL
|
||||||
#define DGEMM_DEFAULT_P 640UL
|
#define DGEMM_DEFAULT_P 640UL
|
||||||
#define CGEMM_DEFAULT_P 640UL
|
#define CGEMM_DEFAULT_P 640UL
|
||||||
|
|
Loading…
Reference in New Issue