Use generic kernel on P6 & P7 so that lapack-test passes

This commit is contained in:
Hong Bo Peng 2024-06-24 11:35:55 -04:00
parent 33bb4b98a4
commit f948dfb47f
4 changed files with 60 additions and 52 deletions

View File

@ -93,6 +93,14 @@ FCOMMON_OPT += -O2 -Mrecursive
endif endif
endif endif
ifeq ($(CORE), POWER6)
CCOMMON_OPT += -Ofast -mcpu=power6 -fno-fast-math
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr6 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power6 -fno-fast-math
endif
endif
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifneq ($(C_COMPILER), PGI) ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -DUSE_OPENMP -fopenmp CCOMMON_OPT += -DUSE_OPENMP -fopenmp

View File

@ -80,6 +80,11 @@ M4_AIX := m4 -B16384
endif endif
$(info $$var is [${$(M4_AIX)}]) $(info $$var is [${$(M4_AIX)}])
endif endif
ifeq ($(CORE), POWER6)
USE_TRMM = 1
endif
ifeq ($(CORE), POWER8) ifeq ($(CORE), POWER8)
ifeq ($(BINARY64),1) ifeq ($(BINARY64),1)
USE_TRMM = 1 USE_TRMM = 1

View File

@ -1,59 +1,54 @@
SGEMMKERNEL = gemm_kernel_power6.S STRMMKERNEL = ../generic/trmmkernel_2x2.c
SGEMMINCOPY = DTRMMKERNEL = ../generic/trmmkernel_2x2.c
SGEMMITCOPY = CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMONCOPY = gemm_ncopy_4.S ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMOTCOPY = gemm_tcopy_4.S
SGEMMINCOPYOBJ = SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMITCOPYOBJ = SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_power6.S
DGEMMINCOPY = DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMITCOPY = DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMONCOPY = gemm_ncopy_4.S DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMOTCOPY = gemm_tcopy_4.S DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMINCOPYOBJ = DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMKERNEL = zgemm_kernel_power6.S CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_power6.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRSMKERNEL_LN = trsm_kernel_power6_LN.S STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = trsm_kernel_power6_LT.S STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = trsm_kernel_power6_LT.S STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = trsm_kernel_power6_RT.S STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = trsm_kernel_power6_LN.S DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = trsm_kernel_power6_LT.S DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = trsm_kernel_power6_LT.S DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = trsm_kernel_power6_RT.S DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CROTKERNEL = ../arm/zrot.c CROTKERNEL = ../arm/zrot.c
ZROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c

12
param.h
View File

@ -2476,14 +2476,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 1024 #define GEMM_DEFAULT_OFFSET_B 1024
#define GEMM_DEFAULT_ALIGN 0x03fffUL #define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_M 4 #define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 4 #define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2 #define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 992 #define SGEMM_DEFAULT_P 992
#define DGEMM_DEFAULT_P 480 #define DGEMM_DEFAULT_P 480