Init POWER8 kernels by POWER6.
This commit is contained in:
parent
be4e5fcd20
commit
3e8d6ea74f
|
@ -55,6 +55,7 @@
|
|||
#define CPUTYPE_POWER6 5
|
||||
#define CPUTYPE_CELL 6
|
||||
#define CPUTYPE_PPCG4 7
|
||||
#define CPUTYPE_POWER8 8
|
||||
|
||||
char *cpuname[] = {
|
||||
"UNKNOWN",
|
||||
|
@ -65,6 +66,7 @@ char *cpuname[] = {
|
|||
"POWER6",
|
||||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
};
|
||||
|
||||
char *lowercpuname[] = {
|
||||
|
@ -76,6 +78,7 @@ char *lowercpuname[] = {
|
|||
"power6",
|
||||
"cell",
|
||||
"ppcg4",
|
||||
"power8",
|
||||
};
|
||||
|
||||
char *corename[] = {
|
||||
|
@ -87,6 +90,7 @@ char *corename[] = {
|
|||
"POWER6",
|
||||
"CELL",
|
||||
"PPCG4",
|
||||
"POWER8",
|
||||
};
|
||||
|
||||
int detect(void){
|
||||
|
@ -115,7 +119,7 @@ int detect(void){
|
|||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5;
|
||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6;
|
||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||
|
||||
|
|
13
getarch.c
13
getarch.c
|
@ -565,6 +565,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "POWER6"
|
||||
#endif
|
||||
|
||||
#if defined(FORCE_POWER8)
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "POWER"
|
||||
#define SUBARCHITECTURE "POWER8"
|
||||
#define SUBDIRNAME "power"
|
||||
#define ARCHCONFIG "-DPOWER8 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=128 " \
|
||||
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||
#define LIBNAME "power8"
|
||||
#define CORENAME "POWER8"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_PPCG4
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "POWER"
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
SGEMMKERNEL = gemm_kernel_power6.S
|
||||
SGEMMINCOPY =
|
||||
SGEMMITCOPY =
|
||||
SGEMMONCOPY = gemm_ncopy_4.S
|
||||
SGEMMOTCOPY = gemm_tcopy_4.S
|
||||
SGEMMINCOPYOBJ =
|
||||
SGEMMITCOPYOBJ =
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMKERNEL = gemm_kernel_power6.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMKERNEL = zgemm_kernel_power6.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMKERNEL = zgemm_kernel_power6.S
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRSMKERNEL_LN = trsm_kernel_power6_LN.S
|
||||
STRSMKERNEL_LT = trsm_kernel_power6_LT.S
|
||||
STRSMKERNEL_RN = trsm_kernel_power6_LT.S
|
||||
STRSMKERNEL_RT = trsm_kernel_power6_RT.S
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_power6_LN.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_power6_LT.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_power6_LT.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_power6_RT.S
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S
|
||||
CTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S
|
||||
CTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S
|
||||
|
||||
ZTRSMKERNEL_LN = ztrsm_kernel_power6_LN.S
|
||||
ZTRSMKERNEL_LT = ztrsm_kernel_power6_LT.S
|
||||
ZTRSMKERNEL_RN = ztrsm_kernel_power6_LT.S
|
||||
ZTRSMKERNEL_RT = ztrsm_kernel_power6_RT.S
|
|
@ -104,6 +104,11 @@
|
|||
#define PREFETCHWSIZE 72
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE 16
|
||||
#define PREFETCHWSIZE 72
|
||||
#endif
|
||||
|
||||
#ifdef PPCG4
|
||||
#define PREFETCHSIZE 16
|
||||
#define PREFETCHWSIZE 72
|
||||
|
|
|
@ -108,6 +108,11 @@
|
|||
#define PREFETCHWSIZE 48
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE 16
|
||||
#define PREFETCHWSIZE 48
|
||||
#endif
|
||||
|
||||
#ifdef PPCG4
|
||||
#define PREFETCHSIZE 16
|
||||
#define PREFETCHWSIZE 48
|
||||
|
|
|
@ -174,6 +174,11 @@
|
|||
#define PREFETCHSIZE_C 40
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 96
|
||||
#define PREFETCHSIZE_C 40
|
||||
#endif
|
||||
|
||||
#ifndef NEEDPARAM
|
||||
|
||||
#ifndef __64BIT__
|
||||
|
|
|
@ -139,6 +139,11 @@
|
|||
#define PREFETCHSIZE_C 8
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 96
|
||||
#define PREFETCHSIZE_C 8
|
||||
#endif
|
||||
|
||||
#define y01 f0
|
||||
#define y02 f1
|
||||
#define y03 f2
|
||||
|
|
|
@ -168,7 +168,11 @@
|
|||
#define PREFETCHSIZE_A 40
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970)
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 40
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8)
|
||||
#define NOP1
|
||||
#define NOP2
|
||||
#else
|
||||
|
|
|
@ -167,7 +167,11 @@
|
|||
#define PREFETCHSIZE_A 40
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970)
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 40
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8)
|
||||
#define NOP1
|
||||
#define NOP2
|
||||
#else
|
||||
|
|
|
@ -170,6 +170,11 @@
|
|||
#define PREFETCHSIZE_C 24
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 24
|
||||
#define PREFETCHSIZE_C 24
|
||||
#endif
|
||||
|
||||
#ifndef XCONJ
|
||||
#define FMADDR FMADD
|
||||
#define FMSUBR FNMSUB
|
||||
|
|
|
@ -144,6 +144,11 @@
|
|||
#define PREFETCHSIZE_C 8
|
||||
#endif
|
||||
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 24
|
||||
#define PREFETCHSIZE_C 8
|
||||
#endif
|
||||
|
||||
#if !(defined(CONJ) && defined(XCONJ))
|
||||
#define FMADDR FMADD
|
||||
#define FMSUBR FNMSUB
|
||||
|
|
|
@ -169,7 +169,11 @@
|
|||
#define PREFETCHSIZE_A 112
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970)
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 112
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8)
|
||||
#define NOP1
|
||||
#define NOP2
|
||||
#else
|
||||
|
|
|
@ -166,7 +166,11 @@
|
|||
#define PREFETCHSIZE_A 112
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970)
|
||||
#ifdef POWER8
|
||||
#define PREFETCHSIZE_A 112
|
||||
#endif
|
||||
|
||||
#if defined(POWER4) || defined(POWER5) || defined(POWER6) || defined(PPC970) || defined(POWER8)
|
||||
#define NOP1
|
||||
#define NOP2
|
||||
#else
|
||||
|
|
32
param.h
32
param.h
|
@ -1959,6 +1959,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(POWER8)
|
||||
|
||||
#define SNUMOPT 4
|
||||
#define DNUMOPT 4
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 384
|
||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
||||
#define SGEMM_DEFAULT_P 992
|
||||
#define DGEMM_DEFAULT_P 480
|
||||
#define CGEMM_DEFAULT_P 488
|
||||
#define ZGEMM_DEFAULT_P 248
|
||||
|
||||
#define SGEMM_DEFAULT_Q 504
|
||||
#define DGEMM_DEFAULT_Q 504
|
||||
#define CGEMM_DEFAULT_Q 400
|
||||
#define ZGEMM_DEFAULT_Q 400
|
||||
|
||||
#define SYMV_P 8
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(SPARC) && defined(V7)
|
||||
|
||||
#define SNUMOPT 4
|
||||
|
|
Loading…
Reference in New Issue