Use generic kernel on P6 & P7 so that lapack-test passes
This commit is contained in:
parent
33bb4b98a4
commit
f948dfb47f
|
@ -93,6 +93,14 @@ FCOMMON_OPT += -O2 -Mrecursive
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), POWER6)
|
||||||
|
CCOMMON_OPT += -Ofast -mcpu=power6 -fno-fast-math
|
||||||
|
ifeq ($(F_COMPILER), IBM)
|
||||||
|
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr6 -qfloat=nomaf -qzerosize
|
||||||
|
else
|
||||||
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power6 -fno-fast-math
|
||||||
|
endif
|
||||||
|
endif
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
ifneq ($(C_COMPILER), PGI)
|
ifneq ($(C_COMPILER), PGI)
|
||||||
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||||
|
|
|
@ -80,6 +80,11 @@ M4_AIX := m4 -B16384
|
||||||
endif
|
endif
|
||||||
$(info $$var is [${$(M4_AIX)}])
|
$(info $$var is [${$(M4_AIX)}])
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), POWER6)
|
||||||
|
USE_TRMM = 1
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
ifeq ($(BINARY64),1)
|
ifeq ($(BINARY64),1)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
|
|
|
@ -1,59 +1,54 @@
|
||||||
SGEMMKERNEL = gemm_kernel_power6.S
|
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
SGEMMINCOPY =
|
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
SGEMMITCOPY =
|
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
SGEMMONCOPY = gemm_ncopy_4.S
|
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
SGEMMOTCOPY = gemm_tcopy_4.S
|
|
||||||
SGEMMINCOPYOBJ =
|
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
SGEMMITCOPYOBJ =
|
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMKERNEL = gemm_kernel_power6.S
|
|
||||||
DGEMMINCOPY =
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
DGEMMITCOPY =
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
DGEMMONCOPY = gemm_ncopy_4.S
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
|
||||||
DGEMMINCOPYOBJ =
|
|
||||||
DGEMMITCOPYOBJ =
|
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMKERNEL = zgemm_kernel_power6.S
|
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
|
||||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMKERNEL = zgemm_kernel_power6.S
|
|
||||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
|
||||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
STRSMKERNEL_LN = trsm_kernel_power6_LN.S
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
STRSMKERNEL_LT = trsm_kernel_power6_LT.S
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
STRSMKERNEL_RN = trsm_kernel_power6_LT.S
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
STRSMKERNEL_RT = trsm_kernel_power6_RT.S
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
DTRSMKERNEL_LN = trsm_kernel_power6_LN.S
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
DTRSMKERNEL_LT = trsm_kernel_power6_LT.S
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
DTRSMKERNEL_RN = trsm_kernel_power6_LT.S
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
DTRSMKERNEL_RT = trsm_kernel_power6_RT.S
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
CTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
CTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
CTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
ZTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
ZTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
ZTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
ZTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
CROTKERNEL = ../arm/zrot.c
|
CROTKERNEL = ../arm/zrot.c
|
||||||
ZROTKERNEL = ../arm/zrot.c
|
ZROTKERNEL = ../arm/zrot.c
|
||||||
|
|
||||||
|
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||||
|
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||||
|
|
12
param.h
12
param.h
|
@ -2476,14 +2476,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 992
|
#define SGEMM_DEFAULT_P 992
|
||||||
#define DGEMM_DEFAULT_P 480
|
#define DGEMM_DEFAULT_P 480
|
||||||
|
|
Loading…
Reference in New Issue