diff --git a/Makefile.power b/Makefile.power index 3fa6d6faf..0755024b0 100644 --- a/Makefile.power +++ b/Makefile.power @@ -93,6 +93,14 @@ FCOMMON_OPT += -O2 -Mrecursive endif endif +ifeq ($(CORE), POWER6) +CCOMMON_OPT += -Ofast -mcpu=power6 -fno-fast-math +ifeq ($(F_COMPILER), IBM) +FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr6 -qfloat=nomaf -qzerosize +else +FCOMMON_OPT += -O2 -frecursive -mcpu=power6 -fno-fast-math +endif +endif ifeq ($(USE_OPENMP), 1) ifneq ($(C_COMPILER), PGI) CCOMMON_OPT += -DUSE_OPENMP -fopenmp diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 87fd5ca10..46e1e1794 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -80,6 +80,11 @@ M4_AIX := m4 -B16384 endif $(info $$var is [${$(M4_AIX)}]) endif + +ifeq ($(CORE), POWER6) +USE_TRMM = 1 +endif + ifeq ($(CORE), POWER8) ifeq ($(BINARY64),1) USE_TRMM = 1 diff --git a/kernel/power/KERNEL.POWER6 b/kernel/power/KERNEL.POWER6 index e6d2c9a51..ab10b78a2 100644 --- a/kernel/power/KERNEL.POWER6 +++ b/kernel/power/KERNEL.POWER6 @@ -1,59 +1,54 @@ -SGEMMKERNEL = gemm_kernel_power6.S -SGEMMINCOPY = -SGEMMITCOPY = -SGEMMONCOPY = gemm_ncopy_4.S -SGEMMOTCOPY = gemm_tcopy_4.S -SGEMMINCOPYOBJ = -SGEMMITCOPYOBJ = +STRMMKERNEL = ../generic/trmmkernel_2x2.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) -DGEMMKERNEL = gemm_kernel_power6.S -DGEMMINCOPY = -DGEMMITCOPY = -DGEMMONCOPY = gemm_ncopy_4.S -DGEMMOTCOPY = gemm_tcopy_4.S -DGEMMINCOPYOBJ = -DGEMMITCOPYOBJ = -DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) -DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) -CGEMMKERNEL = zgemm_kernel_power6.S -CGEMMINCOPY = ../generic/zgemm_ncopy_2.c -CGEMMITCOPY = ../generic/zgemm_tcopy_2.c -CGEMMONCOPY = ../generic/zgemm_ncopy_4.c -CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) -CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) + +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) + +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) -ZGEMMKERNEL = zgemm_kernel_power6.S -ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c -ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c -ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c -ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c -ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) -ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) + +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) -STRSMKERNEL_LN = trsm_kernel_power6_LN.S -STRSMKERNEL_LT = trsm_kernel_power6_LT.S -STRSMKERNEL_RN = trsm_kernel_power6_LT.S -STRSMKERNEL_RT = trsm_kernel_power6_RT.S +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c -DTRSMKERNEL_LN = trsm_kernel_power6_LN.S -DTRSMKERNEL_LT = trsm_kernel_power6_LT.S -DTRSMKERNEL_RN = trsm_kernel_power6_LT.S -DTRSMKERNEL_RT = trsm_kernel_power6_RT.S +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c -CTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S -CTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S -CTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S -CTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c -ZTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S -ZTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S -ZTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S -ZTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c CROTKERNEL = ../arm/zrot.c ZROTKERNEL = ../arm/zrot.c + +SGEMVNKERNEL = ../arm/gemv_n.c +SGEMVTKERNEL = ../arm/gemv_t.c diff --git a/param.h b/param.h index 2ebe824db..dc68ce94b 100644 --- a/param.h +++ b/param.h @@ -2476,14 +2476,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 1024 #define GEMM_DEFAULT_ALIGN 0x03fffUL -#define SGEMM_DEFAULT_UNROLL_M 4 -#define SGEMM_DEFAULT_UNROLL_N 4 -#define DGEMM_DEFAULT_UNROLL_M 4 -#define DGEMM_DEFAULT_UNROLL_N 4 +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_M 2 -#define CGEMM_DEFAULT_UNROLL_N 4 +#define CGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_M 2 -#define ZGEMM_DEFAULT_UNROLL_N 4 +#define ZGEMM_DEFAULT_UNROLL_N 2 #define SGEMM_DEFAULT_P 992 #define DGEMM_DEFAULT_P 480