Merge pull request #2070 from quickwritereader/develop
power9 makefile. dgemm based on power8 kernel with following changes …
This commit is contained in:
commit
4dec151d0b
|
@ -9,7 +9,15 @@ else
|
||||||
USE_OPENMP = 1
|
USE_OPENMP = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), POWER9)
|
||||||
|
ifeq ($(USE_OPENMP), 1)
|
||||||
|
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||||
|
else
|
||||||
|
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
|
||||||
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), POWER8)
|
ifeq ($(CORE), POWER8)
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
|
|
|
@ -48,6 +48,7 @@ POWER5
|
||||||
POWER6
|
POWER6
|
||||||
POWER7
|
POWER7
|
||||||
POWER8
|
POWER8
|
||||||
|
POWER9
|
||||||
PPCG4
|
PPCG4
|
||||||
PPC970
|
PPC970
|
||||||
PPC970MP
|
PPC970MP
|
||||||
|
|
5
common.h
5
common.h
|
@ -348,6 +348,11 @@ typedef int blasint;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef POWER9
|
||||||
|
#ifndef YIELDING
|
||||||
|
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#ifdef PILEDRIVER
|
#ifdef PILEDRIVER
|
||||||
|
|
|
@ -39,7 +39,7 @@
|
||||||
#ifndef COMMON_POWER
|
#ifndef COMMON_POWER
|
||||||
#define COMMON_POWER
|
#define COMMON_POWER
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#define MB __asm__ __volatile__ ("eieio":::"memory")
|
#define MB __asm__ __volatile__ ("eieio":::"memory")
|
||||||
#define WMB __asm__ __volatile__ ("eieio":::"memory")
|
#define WMB __asm__ __volatile__ ("eieio":::"memory")
|
||||||
#else
|
#else
|
||||||
|
@ -241,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#define HAVE_PREFETCH
|
#define HAVE_PREFETCH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || ( defined(PPC970) && defined(OS_DARWIN) )
|
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || ( defined(PPC970) && defined(OS_DARWIN) )
|
||||||
#define DCBT_ARG 0
|
#define DCBT_ARG 0
|
||||||
#else
|
#else
|
||||||
#define DCBT_ARG 8
|
#define DCBT_ARG 8
|
||||||
|
@ -263,7 +263,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#define L1_PREFETCH dcbtst
|
#define L1_PREFETCH dcbtst
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#define L1_DUALFETCH
|
#define L1_DUALFETCH
|
||||||
#define L1_PREFETCHSIZE (16 + 128 * 100)
|
#define L1_PREFETCHSIZE (16 + 128 * 100)
|
||||||
#define L1_PREFETCH dcbtst
|
#define L1_PREFETCH dcbtst
|
||||||
|
@ -812,7 +812,7 @@ Lmcount$lazy_ptr:
|
||||||
#define BUFFER_SIZE ( 2 << 20)
|
#define BUFFER_SIZE ( 2 << 20)
|
||||||
#elif defined(PPC440FP2)
|
#elif defined(PPC440FP2)
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
#elif defined(POWER8)
|
#elif defined(POWER8) || defined(POWER9)
|
||||||
#define BUFFER_SIZE ( 64 << 20)
|
#define BUFFER_SIZE ( 64 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE ( 16 << 20)
|
#define BUFFER_SIZE ( 16 << 20)
|
||||||
|
|
|
@ -94,7 +94,7 @@ char *corename[] = {
|
||||||
"CELL",
|
"CELL",
|
||||||
"PPCG4",
|
"PPCG4",
|
||||||
"POWER8",
|
"POWER8",
|
||||||
"POWER8"
|
"POWER9"
|
||||||
};
|
};
|
||||||
|
|
||||||
int detect(void){
|
int detect(void){
|
||||||
|
@ -124,7 +124,7 @@ int detect(void){
|
||||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
|
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
|
||||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||||
|
|
||||||
|
@ -156,7 +156,7 @@ int detect(void){
|
||||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6;
|
||||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8;
|
||||||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8;
|
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9;
|
||||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL;
|
||||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4;
|
||||||
return CPUTYPE_POWER5;
|
return CPUTYPE_POWER5;
|
||||||
|
@ -180,7 +180,7 @@ int id;
|
||||||
__asm __volatile("mfpvr %0" : "=r"(id));
|
__asm __volatile("mfpvr %0" : "=r"(id));
|
||||||
switch ( id >> 16 ) {
|
switch ( id >> 16 ) {
|
||||||
case 0x4e: // POWER9
|
case 0x4e: // POWER9
|
||||||
return CPUTYPE_POWER8;
|
return CPUTYPE_POWER9;
|
||||||
break;
|
break;
|
||||||
case 0x4d:
|
case 0x4d:
|
||||||
case 0x4b: // POWER8/8E
|
case 0x4b: // POWER8/8E
|
||||||
|
|
12
getarch.c
12
getarch.c
|
@ -637,6 +637,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CORENAME "POWER8"
|
#define CORENAME "POWER8"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(FORCE_POWER9)
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "POWER"
|
||||||
|
#define SUBARCHITECTURE "POWER9"
|
||||||
|
#define SUBDIRNAME "power"
|
||||||
|
#define ARCHCONFIG "-DPOWER9 " \
|
||||||
|
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \
|
||||||
|
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 "
|
||||||
|
#define LIBNAME "power9"
|
||||||
|
#define CORENAME "POWER9"
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef FORCE_PPCG4
|
#ifdef FORCE_PPCG4
|
||||||
#define FORCE
|
#define FORCE
|
||||||
|
|
|
@ -44,6 +44,10 @@ ifeq ($(CORE), POWER8)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), POWER9)
|
||||||
|
USE_TRMM = 1
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), zarch)
|
ifeq ($(ARCH), zarch)
|
||||||
USE_TRMM = 1
|
USE_TRMM = 1
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -0,0 +1,184 @@
|
||||||
|
#SGEMM_BETA = ../generic/gemm_beta.c
|
||||||
|
#DGEMM_BETA = ../generic/gemm_beta.c
|
||||||
|
#CGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
#ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
|
||||||
|
STRMMKERNEL = strmm_kernel_16x8_power8.S
|
||||||
|
DTRMMKERNEL = dgemm_kernel_power9.S
|
||||||
|
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S
|
||||||
|
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S
|
||||||
|
|
||||||
|
SGEMMKERNEL = sgemm_kernel_16x8_power8.S
|
||||||
|
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||||
|
SGEMMITCOPY = sgemm_tcopy_16_power8.S
|
||||||
|
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
|
SGEMMOTCOPY = sgemm_tcopy_8_power8.S
|
||||||
|
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||||
|
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
|
DGEMMKERNEL = dgemm_kernel_power9.S
|
||||||
|
DGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||||
|
DGEMMITCOPY = dgemm_tcopy_16_power8.S
|
||||||
|
DGEMMONCOPY = dgemm_ncopy_4_power8.S
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
|
CGEMMKERNEL = cgemm_kernel_8x4_power8.S
|
||||||
|
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||||
|
CGEMMITCOPY = cgemm_tcopy_8_power8.S
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||||
|
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||||
|
|
||||||
|
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||||
|
ZGEMMITCOPY = zgemm_tcopy_8_power8.S
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
||||||
|
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
|
||||||
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
#Todo: CGEMM3MKERNEL should be 4x4 blocksizes.
|
||||||
|
#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S
|
||||||
|
#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S
|
||||||
|
|
||||||
|
#Pure C for other kernels
|
||||||
|
#SAMAXKERNEL = ../arm/amax.c
|
||||||
|
#DAMAXKERNEL = ../arm/amax.c
|
||||||
|
#CAMAXKERNEL = ../arm/zamax.c
|
||||||
|
#ZAMAXKERNEL = ../arm/zamax.c
|
||||||
|
#
|
||||||
|
#SAMINKERNEL = ../arm/amin.c
|
||||||
|
#DAMINKERNEL = ../arm/amin.c
|
||||||
|
#CAMINKERNEL = ../arm/zamin.c
|
||||||
|
#ZAMINKERNEL = ../arm/zamin.c
|
||||||
|
#
|
||||||
|
#SMAXKERNEL = ../arm/max.c
|
||||||
|
#DMAXKERNEL = ../arm/max.c
|
||||||
|
#
|
||||||
|
#SMINKERNEL = ../arm/min.c
|
||||||
|
#DMINKERNEL = ../arm/min.c
|
||||||
|
#
|
||||||
|
ISAMAXKERNEL = isamax.c
|
||||||
|
IDAMAXKERNEL = idamax.c
|
||||||
|
ICAMAXKERNEL = icamax.c
|
||||||
|
IZAMAXKERNEL = izamax.c
|
||||||
|
#
|
||||||
|
ISAMINKERNEL = isamin.c
|
||||||
|
IDAMINKERNEL = idamin.c
|
||||||
|
ICAMINKERNEL = icamin.c
|
||||||
|
IZAMINKERNEL = izamin.c
|
||||||
|
#
|
||||||
|
#ISMAXKERNEL = ../arm/imax.c
|
||||||
|
#IDMAXKERNEL = ../arm/imax.c
|
||||||
|
#
|
||||||
|
#ISMINKERNEL = ../arm/imin.c
|
||||||
|
#IDMINKERNEL = ../arm/imin.c
|
||||||
|
#
|
||||||
|
SASUMKERNEL = sasum.c
|
||||||
|
DASUMKERNEL = dasum.c
|
||||||
|
CASUMKERNEL = casum.c
|
||||||
|
ZASUMKERNEL = zasum.c
|
||||||
|
#
|
||||||
|
SAXPYKERNEL = saxpy.c
|
||||||
|
DAXPYKERNEL = daxpy.c
|
||||||
|
CAXPYKERNEL = caxpy.c
|
||||||
|
ZAXPYKERNEL = zaxpy.c
|
||||||
|
#
|
||||||
|
SCOPYKERNEL = scopy.c
|
||||||
|
DCOPYKERNEL = dcopy.c
|
||||||
|
CCOPYKERNEL = ccopy.c
|
||||||
|
ZCOPYKERNEL = zcopy.c
|
||||||
|
#
|
||||||
|
SDOTKERNEL = sdot.c
|
||||||
|
DDOTKERNEL = ddot.c
|
||||||
|
DSDOTKERNEL = sdot.c
|
||||||
|
CDOTKERNEL = cdot.c
|
||||||
|
ZDOTKERNEL = zdot.c
|
||||||
|
#
|
||||||
|
SNRM2KERNEL = ../arm/nrm2.c
|
||||||
|
DNRM2KERNEL = ../arm/nrm2.c
|
||||||
|
CNRM2KERNEL = ../arm/znrm2.c
|
||||||
|
ZNRM2KERNEL = ../arm/znrm2.c
|
||||||
|
#
|
||||||
|
SROTKERNEL = srot.c
|
||||||
|
DROTKERNEL = drot.c
|
||||||
|
CROTKERNEL = crot.c
|
||||||
|
ZROTKERNEL = zrot.c
|
||||||
|
#
|
||||||
|
SSCALKERNEL = sscal.c
|
||||||
|
DSCALKERNEL = dscal.c
|
||||||
|
CSCALKERNEL = zscal.c
|
||||||
|
ZSCALKERNEL = zscal.c
|
||||||
|
#
|
||||||
|
SSWAPKERNEL = sswap.c
|
||||||
|
DSWAPKERNEL = dswap.c
|
||||||
|
CSWAPKERNEL = cswap.c
|
||||||
|
ZSWAPKERNEL = zswap.c
|
||||||
|
#
|
||||||
|
|
||||||
|
SGEMVNKERNEL = sgemv_n.c
|
||||||
|
DGEMVNKERNEL = dgemv_n.c
|
||||||
|
CGEMVNKERNEL = cgemv_n.c
|
||||||
|
ZGEMVNKERNEL = zgemv_n_4.c
|
||||||
|
#
|
||||||
|
SGEMVTKERNEL = sgemv_t.c
|
||||||
|
DGEMVTKERNEL = dgemv_t.c
|
||||||
|
CGEMVTKERNEL = cgemv_t.c
|
||||||
|
ZGEMVTKERNEL = zgemv_t_4.c
|
||||||
|
|
||||||
|
|
||||||
|
#SSYMV_U_KERNEL = ../generic/symv_k.c
|
||||||
|
#SSYMV_L_KERNEL = ../generic/symv_k.c
|
||||||
|
#DSYMV_U_KERNEL = ../generic/symv_k.c
|
||||||
|
#DSYMV_L_KERNEL = ../generic/symv_k.c
|
||||||
|
#QSYMV_U_KERNEL = ../generic/symv_k.c
|
||||||
|
#QSYMV_L_KERNEL = ../generic/symv_k.c
|
||||||
|
#CSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||||
|
#CSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
|
#ZSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||||
|
#ZSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
|
#XSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||||
|
#XSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
|
|
||||||
|
#ZHEMV_U_KERNEL = ../generic/zhemv_k.c
|
||||||
|
#ZHEMV_L_KERNEL = ../generic/zhemv_k.c
|
||||||
|
|
||||||
|
LSAME_KERNEL = ../generic/lsame.c
|
||||||
|
SCABS_KERNEL = ../generic/cabs.c
|
||||||
|
DCABS_KERNEL = ../generic/cabs.c
|
||||||
|
QCABS_KERNEL = ../generic/cabs.c
|
||||||
|
|
||||||
|
#Dump kernel
|
||||||
|
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
||||||
|
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "casum_microk_power8.c"
|
#include "casum_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "ccopy_microk_power8.c"
|
#include "ccopy_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
|
|
||||||
static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
|
static void crot_kernel_8 (long n, float *x, float *y, float c, float s)
|
||||||
{
|
{
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "cswap_microk_power8.c"
|
#include "cswap_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "dasum_microk_power8.c"
|
#include "dasum_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "daxpy_microk_power8.c"
|
#include "daxpy_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "dcopy_microk_power8.c"
|
#include "dcopy_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "ddot_microk_power8.c"
|
#include "ddot_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,249 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2013-2019, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
#include "def_vsx.h"
|
||||||
|
|
||||||
|
|
||||||
|
#define LOAD ld
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define STACKSIZE (512 )
|
||||||
|
#define ALPHA_SP (296+192)(SP)
|
||||||
|
#define FZERO (304+192)(SP)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define M r3
|
||||||
|
#define N r4
|
||||||
|
#define K r5
|
||||||
|
|
||||||
|
#define A r7
|
||||||
|
#define B r8
|
||||||
|
#define C r9
|
||||||
|
#define LDC r10
|
||||||
|
#define OFFSET r6
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#define alpha_r vs18
|
||||||
|
|
||||||
|
#define o0 0
|
||||||
|
|
||||||
|
|
||||||
|
#define T4 r12
|
||||||
|
#define T3 r11
|
||||||
|
#define C4 r14
|
||||||
|
#define o8 r15
|
||||||
|
#define o24 r16
|
||||||
|
#define C2 r17
|
||||||
|
#define L r18
|
||||||
|
#define T1 r19
|
||||||
|
#define C3 r20
|
||||||
|
#define TEMP_REG r21
|
||||||
|
#define I r22
|
||||||
|
#define J r23
|
||||||
|
#define AO r24
|
||||||
|
#define BO r25
|
||||||
|
#define CO r26
|
||||||
|
#define o16 r27
|
||||||
|
#define o32 r28
|
||||||
|
#define o48 r29
|
||||||
|
|
||||||
|
#define PRE r30
|
||||||
|
#define T2 r31
|
||||||
|
|
||||||
|
#include "dgemm_macros_power9.S"
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef NEEDPARAM
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
PROFCODE
|
||||||
|
|
||||||
|
addi SP, SP, -STACKSIZE
|
||||||
|
li r0, 0
|
||||||
|
|
||||||
|
stfd f14, 0(SP)
|
||||||
|
stfd f15, 8(SP)
|
||||||
|
stfd f16, 16(SP)
|
||||||
|
stfd f17, 24(SP)
|
||||||
|
|
||||||
|
stfd f18, 32(SP)
|
||||||
|
stfd f19, 40(SP)
|
||||||
|
stfd f20, 48(SP)
|
||||||
|
stfd f21, 56(SP)
|
||||||
|
|
||||||
|
stfd f22, 64(SP)
|
||||||
|
stfd f23, 72(SP)
|
||||||
|
stfd f24, 80(SP)
|
||||||
|
stfd f25, 88(SP)
|
||||||
|
|
||||||
|
stfd f26, 96(SP)
|
||||||
|
stfd f27, 104(SP)
|
||||||
|
stfd f28, 112(SP)
|
||||||
|
stfd f29, 120(SP)
|
||||||
|
|
||||||
|
stfd f30, 128(SP)
|
||||||
|
stfd f31, 136(SP)
|
||||||
|
|
||||||
|
|
||||||
|
std r31, 144(SP)
|
||||||
|
std r30, 152(SP)
|
||||||
|
std r29, 160(SP)
|
||||||
|
std r28, 168(SP)
|
||||||
|
std r27, 176(SP)
|
||||||
|
std r26, 184(SP)
|
||||||
|
std r25, 192(SP)
|
||||||
|
std r24, 200(SP)
|
||||||
|
std r23, 208(SP)
|
||||||
|
std r22, 216(SP)
|
||||||
|
std r21, 224(SP)
|
||||||
|
std r20, 232(SP)
|
||||||
|
std r19, 240(SP)
|
||||||
|
std r18, 248(SP)
|
||||||
|
std r17, 256(SP)
|
||||||
|
std r16, 264(SP)
|
||||||
|
std r15, 272(SP)
|
||||||
|
std r14, 280(SP)
|
||||||
|
|
||||||
|
|
||||||
|
stxv v20, 288(SP)
|
||||||
|
stxv v21, 304(SP)
|
||||||
|
stxv v22, 320(SP)
|
||||||
|
stxv v23, 336(SP)
|
||||||
|
stxv v24, 352(SP)
|
||||||
|
stxv v25, 368(SP)
|
||||||
|
stxv v26, 384(SP)
|
||||||
|
stxv v27, 400(SP)
|
||||||
|
stxv v28, 416(SP)
|
||||||
|
stxv v29, 432(SP)
|
||||||
|
stxv v30, 448(SP)
|
||||||
|
stxv v31, 464(SP)
|
||||||
|
|
||||||
|
|
||||||
|
stfd f1, ALPHA_SP
|
||||||
|
stw r0, FZERO
|
||||||
|
|
||||||
|
slwi LDC, LDC, BASE_SHIFT
|
||||||
|
|
||||||
|
#if defined(TRMMKERNEL)
|
||||||
|
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
cmpwi cr0, M, 0
|
||||||
|
ble .L999_H1
|
||||||
|
cmpwi cr0, N, 0
|
||||||
|
ble .L999_H1
|
||||||
|
cmpwi cr0, K, 0
|
||||||
|
ble .L999_H1
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
addi T1, SP, 296+192
|
||||||
|
|
||||||
|
|
||||||
|
li PRE, 384
|
||||||
|
li o8 , 8
|
||||||
|
li o16, 16
|
||||||
|
li o24, 24
|
||||||
|
li o32, 32
|
||||||
|
li o48, 48
|
||||||
|
|
||||||
|
|
||||||
|
lxvdsx alpha_r, 0, T1
|
||||||
|
|
||||||
|
#include "dgemm_logic_power9.S"
|
||||||
|
|
||||||
|
.L999:
|
||||||
|
addi r3, 0, 0
|
||||||
|
|
||||||
|
lfd f14, 0(SP)
|
||||||
|
lfd f15, 8(SP)
|
||||||
|
lfd f16, 16(SP)
|
||||||
|
lfd f17, 24(SP)
|
||||||
|
|
||||||
|
lfd f18, 32(SP)
|
||||||
|
lfd f19, 40(SP)
|
||||||
|
lfd f20, 48(SP)
|
||||||
|
lfd f21, 56(SP)
|
||||||
|
|
||||||
|
lfd f22, 64(SP)
|
||||||
|
lfd f23, 72(SP)
|
||||||
|
lfd f24, 80(SP)
|
||||||
|
lfd f25, 88(SP)
|
||||||
|
|
||||||
|
lfd f26, 96(SP)
|
||||||
|
lfd f27, 104(SP)
|
||||||
|
lfd f28, 112(SP)
|
||||||
|
lfd f29, 120(SP)
|
||||||
|
|
||||||
|
lfd f30, 128(SP)
|
||||||
|
lfd f31, 136(SP)
|
||||||
|
|
||||||
|
|
||||||
|
ld r31, 144(SP)
|
||||||
|
ld r30, 152(SP)
|
||||||
|
ld r29, 160(SP)
|
||||||
|
ld r28, 168(SP)
|
||||||
|
ld r27, 176(SP)
|
||||||
|
ld r26, 184(SP)
|
||||||
|
ld r25, 192(SP)
|
||||||
|
ld r24, 200(SP)
|
||||||
|
ld r23, 208(SP)
|
||||||
|
ld r22, 216(SP)
|
||||||
|
ld r21, 224(SP)
|
||||||
|
ld r20, 232(SP)
|
||||||
|
ld r19, 240(SP)
|
||||||
|
ld r18, 248(SP)
|
||||||
|
ld r17, 256(SP)
|
||||||
|
ld r16, 264(SP)
|
||||||
|
ld r15, 272(SP)
|
||||||
|
ld r14, 280(SP)
|
||||||
|
|
||||||
|
lxv v20, 288(SP)
|
||||||
|
lxv v21, 304(SP)
|
||||||
|
lxv v22, 320(SP)
|
||||||
|
lxv v23, 336(SP)
|
||||||
|
lxv v24, 352(SP)
|
||||||
|
lxv v25, 368(SP)
|
||||||
|
lxv v26, 384(SP)
|
||||||
|
lxv v27, 400(SP)
|
||||||
|
lxv v28, 416(SP)
|
||||||
|
lxv v29, 432(SP)
|
||||||
|
lxv v30, 448(SP)
|
||||||
|
lxv v31, 464(SP)
|
||||||
|
|
||||||
|
addi SP, SP, STACKSIZE
|
||||||
|
blr
|
||||||
|
|
||||||
|
EPILOGUE
|
||||||
|
#endif
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "dgemv_n_microk_power8.c"
|
#include "dgemv_n_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#pragma GCC optimize "O1"
|
#pragma GCC optimize "O1"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "drot_microk_power8.c"
|
#include "drot_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "dscal_microk_power8.c"
|
#include "dscal_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "dswap_microk_power8.c"
|
#include "dswap_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "sasum_microk_power8.c"
|
#include "sasum_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "scopy_microk_power8.c"
|
#include "scopy_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "sdot_microk_power8.c"
|
#include "sdot_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#pragma GCC optimize "O1"
|
#pragma GCC optimize "O1"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "srot_microk_power8.c"
|
#include "srot_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "sscal_microk_power8.c"
|
#include "sscal_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "sswap_microk_power8.c"
|
#include "sswap_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "zasum_microk_power8.c"
|
#include "zasum_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -36,19 +36,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "zaxpy_microk_power8.c"
|
#include "zaxpy_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifndef HAVE_KERNEL_4
|
#ifndef HAVE_KERNEL_4
|
||||||
|
|
||||||
static void zaxpy_kernel_4(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
|
static void zaxpy_kernel_4(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT da_r,FLOAT da_i)
|
||||||
{
|
{
|
||||||
BLASLONG register i = 0;
|
BLASLONG register i = 0;
|
||||||
BLASLONG register ix = 0;
|
BLASLONG register ix = 0;
|
||||||
FLOAT da_r = alpha[0];
|
|
||||||
FLOAT da_i = alpha[1];
|
|
||||||
|
|
||||||
|
|
||||||
while(i < n)
|
while(i < n)
|
||||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "zcopy_microk_power8.c"
|
#include "zcopy_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "zdot_microk_power8.c"
|
#include "zdot_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#pragma GCC optimize "O1"
|
#pragma GCC optimize "O1"
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#if defined(DOUBLE)
|
#if defined(DOUBLE)
|
||||||
#include "zscal_microk_power8.c"
|
#include "zscal_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
|
||||||
#if defined(POWER8)
|
#if defined(POWER8) || defined(POWER9)
|
||||||
#include "zswap_microk_power8.c"
|
#include "zswap_microk_power8.c"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
31
param.h
31
param.h
|
@ -2230,6 +2230,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(POWER9)
|
||||||
|
|
||||||
|
#define SNUMOPT 16
|
||||||
|
#define DNUMOPT 8
|
||||||
|
|
||||||
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
|
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||||
|
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 16
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 8
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_P 1280
|
||||||
|
#define DGEMM_DEFAULT_P 128
|
||||||
|
#define CGEMM_DEFAULT_P 640
|
||||||
|
#define ZGEMM_DEFAULT_P 320
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_Q 640
|
||||||
|
#define DGEMM_DEFAULT_Q 384
|
||||||
|
#define CGEMM_DEFAULT_Q 640
|
||||||
|
#define ZGEMM_DEFAULT_Q 640
|
||||||
|
|
||||||
|
#define SYMV_P 8
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(SPARC) && defined(V7)
|
#if defined(SPARC) && defined(V7)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue