Merge pull request #2780 from Guobing-Chen/CPL_build_support
Enable COOPERLAKE build target
This commit is contained in:
commit
9ee21a0a39
|
@ -88,6 +88,9 @@ endif
|
|||
ifeq ($(TARGET), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), COOPERLAKE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
|
@ -130,6 +133,9 @@ endif
|
|||
ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
|
@ -553,7 +559,7 @@ DYNAMIC_CORE += HASWELL ZEN
|
|||
endif
|
||||
ifneq ($(NO_AVX512), 1)
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += SKYLAKEX
|
||||
DYNAMIC_CORE += SKYLAKEX COOPERLAKE
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
|
|
@ -27,6 +27,32 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), COOPERLAKE)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX512
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# cooperlake support was added in 10.1
|
||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
||||
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
|
||||
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
||||
CCOMMON_OPT += -march=cooperlake
|
||||
FCOMMON_OPT += -march=cooperlake
|
||||
endif
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), HASWELL)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX2
|
||||
|
|
|
@ -22,6 +22,7 @@ SANDYBRIDGE
|
|||
HASWELL
|
||||
SKYLAKEX
|
||||
ATOM
|
||||
COOPERLAKE
|
||||
|
||||
b)AMD CPU:
|
||||
ATHLON
|
||||
|
|
|
@ -76,9 +76,9 @@ if (DYNAMIC_ARCH)
|
|||
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
|
||||
endif ()
|
||||
if (NOT NO_AVX512)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE)
|
||||
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
|
||||
endif ()
|
||||
endif ()
|
||||
if (DYNAMIC_LIST)
|
||||
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
|
||||
endif ()
|
||||
|
|
|
@ -103,3 +103,16 @@ if (${CORE} STREQUAL "SKYLAKEX")
|
|||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (${CORE} STREQUAL "COOPERLAKE")
|
||||
if (NOT DYNAMIC_ARCH)
|
||||
if (NOT NO_AVX512)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
|
||||
else ()
|
||||
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
|
||||
endif()
|
||||
endif ()
|
||||
endif ()
|
||||
endif ()
|
||||
|
|
|
@ -33,7 +33,7 @@ endif ()
|
|||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
set(NO_AVX 1)
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE")
|
||||
set(TARGET "NEHALEM")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
|
||||
|
@ -45,6 +45,18 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
|||
endif ()
|
||||
|
||||
if (DEFINED TARGET)
|
||||
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
|
||||
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
|
||||
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
|
||||
else()
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
|
||||
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
|
||||
# endif()
|
||||
endif()
|
||||
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
|
||||
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
|
||||
endif()
|
||||
|
|
15
cpuid.h
15
cpuid.h
|
@ -118,6 +118,7 @@
|
|||
#define CORE_ZEN 27
|
||||
#define CORE_SKYLAKEX 28
|
||||
#define CORE_DHYANA 29
|
||||
#define CORE_COOPERLAKE 30
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
|
@ -137,11 +138,12 @@
|
|||
#define HAVE_MISALIGNSSE (1 << 15)
|
||||
#define HAVE_128BITFPU (1 << 16)
|
||||
#define HAVE_FASTMOVU (1 << 17)
|
||||
#define HAVE_AVX (1 << 18)
|
||||
#define HAVE_FMA4 (1 << 19)
|
||||
#define HAVE_FMA3 (1 << 20)
|
||||
#define HAVE_AVX512VL (1 << 21)
|
||||
#define HAVE_AVX2 (1 << 22)
|
||||
#define HAVE_AVX (1 << 18)
|
||||
#define HAVE_FMA4 (1 << 19)
|
||||
#define HAVE_FMA3 (1 << 20)
|
||||
#define HAVE_AVX512VL (1 << 21)
|
||||
#define HAVE_AVX2 (1 << 22)
|
||||
#define HAVE_AVX512BF16 (1 << 23)
|
||||
|
||||
#define CACHE_INFO_L1_I 1
|
||||
#define CACHE_INFO_L1_D 2
|
||||
|
@ -218,7 +220,8 @@ typedef struct {
|
|||
#define CPUTYPE_ZEN 51
|
||||
#define CPUTYPE_SKYLAKEX 52
|
||||
#define CPUTYPE_DHYANA 53
|
||||
#define CPUTYPE_COOPERLAKE 54
|
||||
|
||||
#define CPUTYPE_HYGON_UNKNOWN 54
|
||||
#define CPUTYPE_HYGON_UNKNOWN 99
|
||||
|
||||
#endif
|
||||
|
|
37
cpuid_x86.c
37
cpuid_x86.c
|
@ -249,6 +249,22 @@ int support_avx512(){
|
|||
#endif
|
||||
}
|
||||
|
||||
int support_avx512_bf16(){
|
||||
#if !defined(NO_AVX) && !defined(NO_AVX512)
|
||||
int eax, ebx, ecx, edx;
|
||||
int ret=0;
|
||||
|
||||
if (!support_avx512())
|
||||
return 0;
|
||||
cpuid_count(7, 1, &eax, &ebx, &ecx, &edx);
|
||||
if((eax & 32) == 32){
|
||||
ret=1; // CPUID.7.1:EAX[bit 5] indicates whether avx512_bf16 supported or not
|
||||
}
|
||||
return ret;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
int get_vendor(void){
|
||||
int eax, ebx, ecx, edx;
|
||||
|
@ -335,6 +351,7 @@ int get_cputype(int gettype){
|
|||
if (support_avx()) feature |= HAVE_AVX;
|
||||
if (support_avx2()) feature |= HAVE_AVX2;
|
||||
if (support_avx512()) feature |= HAVE_AVX512VL;
|
||||
if (support_avx512_bf16()) feature |= HAVE_AVX512BF16;
|
||||
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
|
||||
#endif
|
||||
|
||||
|
@ -1337,6 +1354,8 @@ int get_cpuname(void){
|
|||
return CPUTYPE_NEHALEM;
|
||||
case 5:
|
||||
// Skylake X
|
||||
if(support_avx512_bf16())
|
||||
return CPUTYPE_COOPERLAKE;
|
||||
if(support_avx512())
|
||||
return CPUTYPE_SKYLAKEX;
|
||||
if(support_avx2())
|
||||
|
@ -1677,7 +1696,8 @@ static char *cpuname[] = {
|
|||
"EXCAVATOR",
|
||||
"ZEN",
|
||||
"SKYLAKEX",
|
||||
"DHYANA"
|
||||
"DHYANA",
|
||||
"COOPERLAKE"
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
|
@ -1733,7 +1753,8 @@ static char *lowercpuname[] = {
|
|||
"excavator",
|
||||
"zen",
|
||||
"skylakex",
|
||||
"dhyana"
|
||||
"dhyana",
|
||||
"cooperlake"
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
|
@ -1766,7 +1787,8 @@ static char *corename[] = {
|
|||
"EXCAVATOR",
|
||||
"ZEN",
|
||||
"SKYLAKEX",
|
||||
"DHYANA"
|
||||
"DHYANA",
|
||||
"COOPERLAKE"
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
|
@ -1799,7 +1821,8 @@ static char *corename_lower[] = {
|
|||
"excavator",
|
||||
"zen",
|
||||
"skylakex",
|
||||
"dhyana"
|
||||
"dhyana",
|
||||
"cooperlake"
|
||||
};
|
||||
|
||||
|
||||
|
@ -2007,7 +2030,9 @@ int get_coretype(void){
|
|||
case 5:
|
||||
// Skylake X
|
||||
#ifndef NO_AVX512
|
||||
return CORE_SKYLAKEX;
|
||||
if(support_avx512_bf16())
|
||||
return CORE_COOPERLAKE;
|
||||
return CORE_SKYLAKEX;
|
||||
#else
|
||||
if(support_avx())
|
||||
#ifndef NO_AVX2
|
||||
|
@ -2276,6 +2301,7 @@ void get_cpuconfig(void){
|
|||
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
|
||||
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
|
||||
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
|
||||
if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n");
|
||||
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
|
||||
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
|
||||
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
|
||||
|
@ -2346,6 +2372,7 @@ void get_sse(void){
|
|||
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
|
||||
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
|
||||
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
|
||||
if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n");
|
||||
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
|
||||
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
|
||||
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
|
||||
|
|
|
@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
#else
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
|
|
@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
/* Split local region of B into parts */
|
||||
for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){
|
||||
min_jj = MIN(n_to, js + div_n) - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
|
|
@ -135,7 +135,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -205,7 +205,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -300,7 +300,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -370,7 +370,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
|
|
@ -122,7 +122,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < ls - js; jjs += min_jj){
|
||||
min_jj = ls - js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -146,7 +146,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < min_l; jjs += min_jj){
|
||||
min_jj = min_l - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -203,7 +203,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -258,7 +258,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < min_l; jjs += min_jj){
|
||||
min_jj = min_l - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -283,7 +283,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){
|
||||
min_jj = js - ls - min_l - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
@ -344,7 +344,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
|
|||
|
||||
for(jjs = js; jjs < js + min_j; jjs += min_jj){
|
||||
min_jj = min_j + js - jjs;
|
||||
#ifdef SKYLAKEX
|
||||
#if defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
|
||||
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
|
||||
#else
|
||||
|
|
|
@ -180,9 +180,10 @@ int get_L2_size(void){
|
|||
int eax, ebx, ecx, edx;
|
||||
|
||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
|
||||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
|
||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
|
||||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || \
|
||||
defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
|
@ -266,7 +267,9 @@ int get_L2_size(void){
|
|||
void blas_set_parameter(void){
|
||||
|
||||
int factor;
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || \
|
||||
defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || \
|
||||
defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
int size = 16;
|
||||
#else
|
||||
int size = get_L2_size();
|
||||
|
|
30
getarch.c
30
getarch.c
|
@ -365,6 +365,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_COOPERLAKE
|
||||
#ifdef NO_AVX512
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "HASWELL"
|
||||
#define ARCHCONFIG "-DHASWELL " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||
"-DFMA3"
|
||||
#define LIBNAME "haswell"
|
||||
#define CORENAME "HASWELL"
|
||||
#else
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "COOPERLAKE"
|
||||
#define ARCHCONFIG "-DCOOPERLAKE " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||
"-DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=cooperlake"
|
||||
#define LIBNAME "cooperlake"
|
||||
#define CORENAME "COOPERLAKE"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ATOM
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
|
|
|
@ -127,7 +127,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
|
||||
# Makefile.L3
|
||||
set(USE_TRMM false)
|
||||
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) )
|
||||
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE))
|
||||
set(USE_TRMM true)
|
||||
endif ()
|
||||
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9) OR (TARGET_CORE MATCHES POWER10))
|
||||
|
|
|
@ -8,6 +8,7 @@ include $(TOPDIR)/Makefile.system
|
|||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
|
||||
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), power)
|
||||
|
@ -37,7 +38,22 @@ ifdef NO_AVX2
|
|||
endif
|
||||
|
||||
ifdef TARGET_CORE
|
||||
ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||
ifeq ($(TARGET_CORE), COOPERLAKE)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
override CFLAGS += -march=cooperlake
|
||||
else
|
||||
override CFLAGS += -march=skylake-avx512
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
override CFLAGS += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
override CFLAGS += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
else ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) -march=skylake-avx512
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
override CFLAGS += -fno-asynchronous-unwind-tables
|
||||
|
|
|
@ -43,6 +43,10 @@ ifeq ($(CORE), SKYLAKEX)
|
|||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), COOPERLAKE)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ZEN)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
|
|
@ -1171,7 +1171,7 @@ static void init_parameter(void) {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SKYLAKEX
|
||||
#if defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "SkylakeX\n");
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
include $(KERNELDIR)/KERNEL.SKYLAKEX
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "caxpy_microk_steamroller-2.c"
|
||||
#elif defined(BULLDOZER)
|
||||
#include "caxpy_microk_bulldozer-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE)
|
||||
#include "caxpy_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "caxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "cdot_microk_bulldozer-2.c"
|
||||
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
|
||||
#include "cdot_microk_steamroller-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "cdot_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "cdot_microk_sandy-2.c"
|
||||
|
|
|
@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "cgemv_n_microk_haswell-4.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "cgemv_n_microk_bulldozer-4.c"
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "cgemv_t_microk_haswell-4.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "cgemv_t_microk_bulldozer-4.c"
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "cscal_microk_haswell-2.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER)
|
||||
#include "cscal_microk_bulldozer-2.c"
|
||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "daxpy_microk_piledriver-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "daxpy_microk_haswell-2.c"
|
||||
#elif defined (SKYLAKEX)
|
||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "daxpy_microk_skylakex-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "daxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "ddot_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "ddot_microk_haswell-2.c"
|
||||
#elif defined (SKYLAKEX)
|
||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "ddot_microk_skylakex-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "ddot_microk_sandy-2.c"
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "dgemv_n_microk_nehalem-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "dgemv_n_microk_haswell-4.c"
|
||||
#elif defined (SKYLAKEX)
|
||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "dgemv_n_microk_skylakex-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "dgemv_t_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "dscal_microk_sandy-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "dscal_microk_haswell-2.c"
|
||||
#elif defined (SKYLAKEX)
|
||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "dscal_microk_skylakex-2.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "dsymv_L_microk_bulldozer-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "dsymv_L_microk_haswell-2.c"
|
||||
#elif defined (SKYLAKEX)
|
||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "dsymv_L_microk_skylakex-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "dsymv_L_microk_sandy-2.c"
|
||||
|
|
|
@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "dsymv_U_microk_bulldozer-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "dsymv_U_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "dsymv_U_microk_sandy-2.c"
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "saxpy_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "saxpy_microk_haswell-2.c"
|
||||
#elif defined (SKYLAKEX)
|
||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "saxpy_microk_skylakex-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "saxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "sdot_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#include "sdot_microk_haswell-2.c"
|
||||
#elif defined (SKYLAKEX)
|
||||
#elif defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "sdot_microk_skylakex-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "sdot_microk_sandy-2.c"
|
||||
|
|
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "sgemv_n_microk_nehalem-4.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "sgemv_n_microk_sandy-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "sgemv_n_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "sgemv_t_microk_bulldozer-4.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "sgemv_t_microk_sandy-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "sgemv_t_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "ssymv_L_microk_bulldozer-2.c"
|
||||
#elif defined(NEHALEM)
|
||||
#include "ssymv_L_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "ssymv_L_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "ssymv_L_microk_sandy-2.c"
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "ssymv_U_microk_bulldozer-2.c"
|
||||
#elif defined(NEHALEM)
|
||||
#include "ssymv_U_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "ssymv_U_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "ssymv_U_microk_sandy-2.c"
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "zaxpy_microk_bulldozer-2.c"
|
||||
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "zaxpy_microk_steamroller-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "zaxpy_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "zaxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "zdot_microk_bulldozer-2.c"
|
||||
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
|
||||
#include "zdot_microk_steamroller-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "zdot_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "zdot_microk_sandy-2.c"
|
||||
|
|
|
@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "zgemv_n_microk_haswell-4.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "zgemv_n_microk_sandy-4.c"
|
||||
|
|
|
@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "zgemv_t_microk_bulldozer-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "zgemv_t_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#include "zscal_microk_haswell-2.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER)
|
||||
#include "zscal_microk_bulldozer-2.c"
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
118
param.h
118
param.h
|
@ -1748,6 +1748,124 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#ifdef COOPERLAKE
|
||||
|
||||
#define SNUMOPT 16
|
||||
#define DNUMOPT 8
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SYMV_P 8
|
||||
|
||||
#if defined(XDOUBLE) || defined(DOUBLE)
|
||||
#define SWITCH_RATIO 8
|
||||
#define GEMM_PREFERED_SIZE 8
|
||||
#else
|
||||
#define SWITCH_RATIO 16
|
||||
#define GEMM_PREFERED_SIZE 16
|
||||
#endif
|
||||
#define USE_SGEMM_KERNEL_DIRECT 1
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
|
||||
#else
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define DGEMM_DEFAULT_UNROLL_M 16
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_MN 32
|
||||
#define DGEMM_DEFAULT_UNROLL_MN 32
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define CGEMM_DEFAULT_R 1024
|
||||
#define ZGEMM_DEFAULT_P 512
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
#define SGEMM_DEFAULT_Q 256
|
||||
#define DGEMM_DEFAULT_Q 256
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 256
|
||||
#define ZGEMM_DEFAULT_Q 192
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#else
|
||||
|
||||
#define SGEMM_DEFAULT_P 640
|
||||
#define DGEMM_DEFAULT_P 192
|
||||
#define CGEMM_DEFAULT_P 384
|
||||
#define ZGEMM_DEFAULT_P 256
|
||||
|
||||
#define SGEMM_DEFAULT_Q 320
|
||||
#define DGEMM_DEFAULT_Q 384
|
||||
#define CGEMM_DEFAULT_Q 192
|
||||
#define ZGEMM_DEFAULT_Q 128
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_R 8640
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#define CGEMM3M_DEFAULT_UNROLL_N 4
|
||||
#define CGEMM3M_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_M 4
|
||||
|
||||
#define CGEMM3M_DEFAULT_P 320
|
||||
#define ZGEMM3M_DEFAULT_P 256
|
||||
#define XGEMM3M_DEFAULT_P 112
|
||||
#define CGEMM3M_DEFAULT_Q 320
|
||||
#define ZGEMM3M_DEFAULT_Q 256
|
||||
#define XGEMM3M_DEFAULT_Q 224
|
||||
#define CGEMM3M_DEFAULT_R 12288
|
||||
#define ZGEMM3M_DEFAULT_R 12288
|
||||
#define XGEMM3M_DEFAULT_R 12288
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ATOM
|
||||
|
|
Loading…
Reference in New Issue