diff --git a/Makefile.system b/Makefile.system index bd2164d02..833511fad 100644 --- a/Makefile.system +++ b/Makefile.system @@ -121,6 +121,9 @@ endif ifeq ($(TARGET), COOPERLAKE) GETARCH_FLAGS := -DFORCE_NEHALEM endif +ifeq ($(TARGET), SAPPHIRERAPIDS) +GETARCH_FLAGS := -DFORCE_NEHALEM +endif ifeq ($(TARGET), SANDYBRIDGE) GETARCH_FLAGS := -DFORCE_NEHALEM endif @@ -166,6 +169,9 @@ endif ifeq ($(TARGET_CORE), COOPERLAKE) GETARCH_FLAGS := -DFORCE_NEHALEM endif +ifeq ($(TARGET_CORE), SAPPHIRERAPIDS) +GETARCH_FLAGS := -DFORCE_NEHALEM +endif ifeq ($(TARGET_CORE), SANDYBRIDGE) GETARCH_FLAGS := -DFORCE_NEHALEM endif diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 307cbe1d9..15cf202c0 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -96,6 +96,30 @@ endif endif endif +ifeq ($(CORE), SAPPHIRERAPIDS) +ifndef NO_AVX512 +ifeq ($(C_COMPILER), GCC) +# sapphire rapids support was added in 11 +ifeq ($(GCCVERSIONGTEQ11), 1) +CCOMMON_OPT += -march=sapphirerapids +ifneq ($(F_COMPILER), NAG) +FCOMMON_OPT += -march=sapphirerapids +endif +endif +endif +ifeq ($(OSNAME), CYGWIN_NT) +CCOMMON_OPT += -fno-asynchronous-unwind-tables +FCOMMON_OPT += -fno-asynchronous-unwind-tables +endif +ifeq ($(OSNAME), WINNT) +ifeq ($(C_COMPILER), GCC) +CCOMMON_OPT += -fno-asynchronous-unwind-tables +FCOMMON_OPT += -fno-asynchronous-unwind-tables +endif +endif +endif +endif + ifdef HAVE_AVX2 ifndef NO_AVX2 ifeq ($(C_COMPILER), GCC) diff --git a/TargetList.txt b/TargetList.txt index 963545cdd..b02a011d5 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -23,6 +23,7 @@ HASWELL SKYLAKEX ATOM COOPERLAKE +SAPPHIRERAPIDS b)AMD CPU: ATHLON diff --git a/cmake/cc.cmake b/cmake/cc.cmake index 1794b5e5b..0ab1d4c1b 100644 --- a/cmake/cc.cmake +++ b/cmake/cc.cmake @@ -126,6 +126,19 @@ if (${CORE} STREQUAL COOPERLAKE) endif () endif () +if (${CORE} STREQUAL SAPPHIRERAPIDS) + if (NOT DYNAMIC_ARCH) + if (NOT NO_AVX512) + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 11.0 OR ${GCC_VERSION} VERSION_EQUAL 11.0) + set (CCOMMON_OPT "${CCOMMON_OPT} -march=sapphirerapids") + else () + set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") + endif() + endif () + endif () +endif () + if (NOT DYNAMIC_ARCH) if (HAVE_AVX2) set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2") diff --git a/cmake/system.cmake b/cmake/system.cmake index f56ded966..bcca91c25 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -33,7 +33,7 @@ endif () if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) message(STATUS "Compiling a ${BINARY}-bit binary.") set(NO_AVX 1) - if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE") + if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE" OR ${TARGET} STREQUAL "SAPPHIRERAPIDS") set(TARGET "NEHALEM") endif () if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") @@ -163,6 +163,22 @@ if (DEFINED TARGET) endif() endif() endif() + if (${TARGET} STREQUAL SAPPHIRERAPIDS AND NOT NO_AVX512) + if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 11.0) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids") + else() + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") + endif() + elseif (${CMAKE_C_COMPILER_ID} STREQUAL "Clang" OR ${CMAKE_C_COMPILER_ID} STREQUAL "AppleClang") + if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 12.0) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=sapphirerapids") + else() + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") + endif() + endif() + endif() if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") endif() diff --git a/cpuid.h b/cpuid.h index 2c43922e7..55478893c 100644 --- a/cpuid.h +++ b/cpuid.h @@ -120,6 +120,7 @@ #define CORE_SKYLAKEX 28 #define CORE_DHYANA 29 #define CORE_COOPERLAKE 30 +#define CORE_SAPPHIRERAPIDS 31 #define HAVE_SSE (1 << 0) #define HAVE_SSE2 (1 << 1) @@ -145,6 +146,7 @@ #define HAVE_AVX512VL (1 << 21) #define HAVE_AVX2 (1 << 22) #define HAVE_AVX512BF16 (1 << 23) +#define HAVE_AMXBF16 (1 << 24) #define CACHE_INFO_L1_I 1 #define CACHE_INFO_L1_D 2 @@ -222,6 +224,7 @@ typedef struct { #define CPUTYPE_SKYLAKEX 52 #define CPUTYPE_DHYANA 53 #define CPUTYPE_COOPERLAKE 54 +#define CPUTYPE_SAPPHIRERAPIDS 55 #define CPUTYPE_HYGON_UNKNOWN 99 diff --git a/cpuid_x86.c b/cpuid_x86.c index 5aa49055a..bb9d779bd 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -266,6 +266,31 @@ int support_avx512_bf16(){ #endif } +#define BIT_AMX_TILE 0x01000000 +#define BIT_AMX_BF16 0x00400000 +#define BIT_AMX_ENBD 0x00060000 + +int support_amx_bf16() { +#if !defined(NO_AVX) && !defined(NO_AVX512) + int eax, ebx, ecx, edx; + int ret=0; + + if (!support_avx512()) + return 0; + // CPUID.7.0:EDX indicates AMX support + cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); + if ((edx & BIT_AMX_TILE) && (edx & BIT_AMX_BF16)) { + // CPUID.D.0:EAX[17:18] indicates AMX enabled + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + if ((eax & BIT_AMX_ENBD) == BIT_AMX_ENBD) + ret = 1; + } + return ret; +#else + return 0; +#endif +} + int get_vendor(void){ int eax, ebx, ecx, edx; char vendor[13]; @@ -353,6 +378,7 @@ int get_cputype(int gettype){ if (support_avx2()) feature |= HAVE_AVX2; if (support_avx512()) feature |= HAVE_AVX512VL; if (support_avx512_bf16()) feature |= HAVE_AVX512BF16; + if (support_amx_bf16()) feature |= HAVE_AMXBF16; if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3; #endif @@ -2389,6 +2415,7 @@ void get_cpuconfig(void){ if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n"); if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n"); if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n"); + if (features & HAVE_AMXBF16 ) printf("#define HAVE_AMXBF16\n"); if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n"); if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n"); if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n"); @@ -2460,6 +2487,7 @@ void get_sse(void){ if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n"); if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n"); if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n"); + if (features & HAVE_AMXBF16 ) printf("HAVE_AMXBF16=1\n"); if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n"); if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n"); if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n"); diff --git a/driver/level3/level3.c b/driver/level3/level3.c index 9b44deb85..4a8e193be 100644 --- a/driver/level3/level3.c +++ b/driver/level3/level3.c @@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, #else for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 2b33c9589..dfc7107b8 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, /* Split local region of B into parts */ for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){ min_jj = MIN(n_to, js + div_n) - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else diff --git a/driver/level3/trmm_L.c b/driver/level3/trmm_L.c index 880de4df4..e25ea7afe 100644 --- a/driver/level3/trmm_L.c +++ b/driver/level3/trmm_L.c @@ -138,7 +138,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -215,7 +215,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -320,7 +320,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -399,7 +399,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else diff --git a/driver/level3/trmm_R.c b/driver/level3/trmm_R.c index 3be43edde..ab9cdfae8 100644 --- a/driver/level3/trmm_R.c +++ b/driver/level3/trmm_R.c @@ -122,7 +122,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = 0; jjs < ls - js; jjs += min_jj){ min_jj = ls - js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -146,7 +146,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = 0; jjs < min_l; jjs += min_jj){ min_jj = min_l - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -203,7 +203,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -258,7 +258,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = 0; jjs < min_l; jjs += min_jj){ min_jj = min_l - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -283,7 +283,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = 0; jjs < js - ls - min_l; jjs += min_jj){ min_jj = js - ls - min_l - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else @@ -344,7 +344,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO for(jjs = js; jjs < js + min_j; jjs += min_jj){ min_jj = min_j + js - jjs; -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) /* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */ if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N; #else diff --git a/driver/others/parameter.c b/driver/others/parameter.c index 791e5dc27..0d5c6aec0 100644 --- a/driver/others/parameter.c +++ b/driver/others/parameter.c @@ -183,7 +183,7 @@ int get_L2_size(void){ defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || \ - defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) + defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) cpuid(0x80000006, &eax, &ebx, &ecx, &edx); @@ -269,7 +269,7 @@ void blas_set_parameter(void){ int factor; #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || \ defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || \ - defined(SKYLAKEX) || defined(COOPERLAKE) + defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) int size = 16; #else int size = get_L2_size(); diff --git a/getarch.c b/getarch.c index 094feaadd..d64ec3757 100644 --- a/getarch.c +++ b/getarch.c @@ -469,6 +469,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif #endif +#ifdef FORCE_SAPPHIRERAPIDS +#ifdef NO_AVX512 +#define FORCE +#define FORCE_INTEL +#define ARCHITECTURE "X86" +#define SUBARCHITECTURE "HASWELL" +#define ARCHCONFIG "-DHASWELL " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ + "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3" +#define LIBNAME "haswell" +#define CORENAME "HASWELL" +#else +#define FORCE +#define FORCE_INTEL +#define ARCHITECTURE "X86" +#define SUBARCHITECTURE "SAPPHIRERAPIDS" +#define ARCHCONFIG "-DSAPPHIRERAPIDS " \ + "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \ + "-DHAVE_AVX2 -DHAVE_FMA3 -DFMA3 -DHAVE_AVX512VL -DHAVE_AVX512BF16 -march=sapphirerapids" +#define LIBNAME "sapphirerapids" +#define CORENAME "SAPPHIRERAPIDS" +#endif +#endif + #ifdef FORCE_ATOM #define FORCE #define FORCE_INTEL diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 9ffbd944f..9c8460723 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -198,7 +198,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) # Makefile.L3 set(USE_TRMM false) string(TOUPPER ${TARGET_CORE} UC_TARGET_CORE) - if (ARM OR ARM64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE)) + if (ARM OR ARM64 OR (UC_TARGET_CORE MATCHES LONGSOON3B) OR (UC_TARGET_CORE MATCHES GENERIC) OR (UC_TARGET_CORE MATCHES HASWELL) OR (UC_TARGET_CORE MATCHES ZEN) OR (UC_TARGET_CORE MATCHES SKYLAKEX) OR (UC_TARGET_CORE MATCHES COOPERLAKE) OR (UC_TARGET_CORE MATCHES SAPPHIRERAPIDS)) set(USE_TRMM true) endif () if (ZARCH OR (UC_TARGET_CORE MATCHES POWER8) OR (UC_TARGET_CORE MATCHES POWER9) OR (UC_TARGET_CORE MATCHES POWER10)) diff --git a/kernel/Makefile b/kernel/Makefile index 1a6c9413f..cbe4cde6e 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -31,7 +31,22 @@ ifdef NO_AVX2 endif ifdef TARGET_CORE -ifeq ($(TARGET_CORE), COOPERLAKE) +ifeq ($(TARGET_CORE), SAPPHIRERAPIDS) + override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) + ifeq ($(GCCVERSIONGTEQ10), 1) + override CFLAGS += -march=sapphirerapids + else + override CFLAGS += -march=skylake-avx512 -mavx512f + endif + ifeq ($(OSNAME), CYGWIN_NT) + override CFLAGS += -fno-asynchronous-unwind-tables + endif + ifeq ($(OSNAME), WINNT) + ifeq ($(C_COMPILER), GCC) + override CFLAGS += -fno-asynchronous-unwind-tables + endif + endif +else ifeq ($(TARGET_CORE), COOPERLAKE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) ifeq ($(GCCVERSIONGTEQ10), 1) override CFLAGS += -march=cooperlake diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 2d274d33b..89691ef6f 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -47,6 +47,10 @@ ifeq ($(CORE), COOPERLAKE) USE_TRMM = 1 endif +ifeq ($(CORE), SAPPHIRERAPIDS) +USE_TRMM = 1 +endif + ifeq ($(CORE), ZEN) USE_TRMM = 1 endif diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 19b7b5f0b..fe796be64 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -1518,7 +1518,7 @@ static void init_parameter(void) { #endif #endif -#if defined(SKYLAKEX) || defined(COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #ifdef DEBUG fprintf(stderr, "SkylakeX\n"); diff --git a/kernel/x86/trsm_kernel_LN_2x4_penryn.S b/kernel/x86/trsm_kernel_LN_2x4_penryn.S index fde9eba8e..0d71201d6 100644 --- a/kernel/x86/trsm_kernel_LN_2x4_penryn.S +++ b/kernel/x86/trsm_kernel_LN_2x4_penryn.S @@ -62,7 +62,7 @@ #define PREFETCHSIZE (8 * 21 + 4) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHSIZE (8 * 21 + 4) #endif diff --git a/kernel/x86/trsm_kernel_LN_4x4_penryn.S b/kernel/x86/trsm_kernel_LN_4x4_penryn.S index fddf7560f..e775b4d76 100644 --- a/kernel/x86/trsm_kernel_LN_4x4_penryn.S +++ b/kernel/x86/trsm_kernel_LN_4x4_penryn.S @@ -62,7 +62,7 @@ #define PREFETCHSIZE (8 * 21 + 4) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHSIZE (8 * 21 + 4) #endif diff --git a/kernel/x86/trsm_kernel_LT_2x4_penryn.S b/kernel/x86/trsm_kernel_LT_2x4_penryn.S index 33afd2a61..d3d110811 100644 --- a/kernel/x86/trsm_kernel_LT_2x4_penryn.S +++ b/kernel/x86/trsm_kernel_LT_2x4_penryn.S @@ -62,7 +62,7 @@ #define PREFETCHSIZE (8 * 21 + 4) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHSIZE (8 * 21 + 4) #endif diff --git a/kernel/x86/trsm_kernel_LT_4x4_penryn.S b/kernel/x86/trsm_kernel_LT_4x4_penryn.S index b05bd6ee5..e56a768db 100644 --- a/kernel/x86/trsm_kernel_LT_4x4_penryn.S +++ b/kernel/x86/trsm_kernel_LT_4x4_penryn.S @@ -62,7 +62,7 @@ #define PREFETCHSIZE (8 * 21 + 4) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHSIZE (8 * 21 + 4) #endif diff --git a/kernel/x86/trsm_kernel_RT_2x4_penryn.S b/kernel/x86/trsm_kernel_RT_2x4_penryn.S index f960559a6..85a29ce57 100644 --- a/kernel/x86/trsm_kernel_RT_2x4_penryn.S +++ b/kernel/x86/trsm_kernel_RT_2x4_penryn.S @@ -62,7 +62,7 @@ #define PREFETCHSIZE (8 * 21 + 4) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHSIZE (8 * 21 + 4) #endif diff --git a/kernel/x86/trsm_kernel_RT_4x4_penryn.S b/kernel/x86/trsm_kernel_RT_4x4_penryn.S index cf842c9b5..5c128d7a4 100644 --- a/kernel/x86/trsm_kernel_RT_4x4_penryn.S +++ b/kernel/x86/trsm_kernel_RT_4x4_penryn.S @@ -62,7 +62,7 @@ #define PREFETCHSIZE (8 * 21 + 4) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHSIZE (8 * 21 + 4) #endif diff --git a/kernel/x86/ztrsm_kernel_LN_2x2_penryn.S b/kernel/x86/ztrsm_kernel_LN_2x2_penryn.S index 63c44c27a..73174e424 100644 --- a/kernel/x86/ztrsm_kernel_LN_2x2_penryn.S +++ b/kernel/x86/ztrsm_kernel_LN_2x2_penryn.S @@ -61,7 +61,7 @@ #define PREFETCHSIZE 84 #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht1 #define PREFETCHSIZE 84 #endif diff --git a/kernel/x86/ztrsm_kernel_LT_1x2_penryn.S b/kernel/x86/ztrsm_kernel_LT_1x2_penryn.S index 4cb01e50a..ebe83ff40 100644 --- a/kernel/x86/ztrsm_kernel_LT_1x2_penryn.S +++ b/kernel/x86/ztrsm_kernel_LT_1x2_penryn.S @@ -63,7 +63,7 @@ #define PREFETCHSIZE 84 #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht1 #define PREFETCHSIZE 84 #endif diff --git a/kernel/x86/ztrsm_kernel_LT_2x2_penryn.S b/kernel/x86/ztrsm_kernel_LT_2x2_penryn.S index 09d5d8e43..b26ffb473 100644 --- a/kernel/x86/ztrsm_kernel_LT_2x2_penryn.S +++ b/kernel/x86/ztrsm_kernel_LT_2x2_penryn.S @@ -61,7 +61,7 @@ #define PREFETCHSIZE 84 #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht1 #define PREFETCHSIZE 84 #endif diff --git a/kernel/x86/ztrsm_kernel_RT_1x2_penryn.S b/kernel/x86/ztrsm_kernel_RT_1x2_penryn.S index 7d129e54c..c2c7caadc 100644 --- a/kernel/x86/ztrsm_kernel_RT_1x2_penryn.S +++ b/kernel/x86/ztrsm_kernel_RT_1x2_penryn.S @@ -63,7 +63,7 @@ #define PREFETCHSIZE 84 #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht1 #define PREFETCHSIZE 84 #endif diff --git a/kernel/x86/ztrsm_kernel_RT_2x2_penryn.S b/kernel/x86/ztrsm_kernel_RT_2x2_penryn.S index d33599317..42526135c 100644 --- a/kernel/x86/ztrsm_kernel_RT_2x2_penryn.S +++ b/kernel/x86/ztrsm_kernel_RT_2x2_penryn.S @@ -61,7 +61,7 @@ #define PREFETCHSIZE 84 #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht1 #define PREFETCHSIZE 84 #endif diff --git a/kernel/x86_64/KERNEL.SAPPHIRERAPIDS b/kernel/x86_64/KERNEL.SAPPHIRERAPIDS new file mode 100644 index 000000000..61965c745 --- /dev/null +++ b/kernel/x86_64/KERNEL.SAPPHIRERAPIDS @@ -0,0 +1 @@ +include $(KERNELDIR)/KERNEL.COOPERLAKE diff --git a/kernel/x86_64/caxpy.c b/kernel/x86_64/caxpy.c index c19b98f02..7270a98bc 100644 --- a/kernel/x86_64/caxpy.c +++ b/kernel/x86_64/caxpy.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "caxpy_microk_steamroller-2.c" #elif defined(BULLDOZER) #include "caxpy_microk_bulldozer-2.c" -#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX) || defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #include "caxpy_microk_haswell-2.c" #elif defined(SANDYBRIDGE) #include "caxpy_microk_sandy-2.c" diff --git a/kernel/x86_64/cdot.c b/kernel/x86_64/cdot.c index 654cd351a..264776239 100644 --- a/kernel/x86_64/cdot.c +++ b/kernel/x86_64/cdot.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "cdot_microk_bulldozer-2.c" #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) #include "cdot_microk_steamroller-2.c" -#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "cdot_microk_haswell-2.c" #elif defined(SANDYBRIDGE) #include "cdot_microk_sandy-2.c" diff --git a/kernel/x86_64/cgemv_n_4.c b/kernel/x86_64/cgemv_n_4.c index 0ed02b8d8..3ca173c20 100644 --- a/kernel/x86_64/cgemv_n_4.c +++ b/kernel/x86_64/cgemv_n_4.c @@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "common.h" -#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "cgemv_n_microk_haswell-4.c" #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #include "cgemv_n_microk_bulldozer-4.c" diff --git a/kernel/x86_64/cgemv_t_4.c b/kernel/x86_64/cgemv_t_4.c index c2903b11f..3187e196c 100644 --- a/kernel/x86_64/cgemv_t_4.c +++ b/kernel/x86_64/cgemv_t_4.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "cgemv_t_microk_haswell-4.c" #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #include "cgemv_t_microk_bulldozer-4.c" diff --git a/kernel/x86_64/cscal.c b/kernel/x86_64/cscal.c index 6d75358a6..dc3f688c6 100644 --- a/kernel/x86_64/cscal.c +++ b/kernel/x86_64/cscal.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "cscal_microk_haswell-2.c" #elif defined(BULLDOZER) || defined(PILEDRIVER) #include "cscal_microk_bulldozer-2.c" diff --git a/kernel/x86_64/daxpy.c b/kernel/x86_64/daxpy.c index 26437012c..2796b8270 100644 --- a/kernel/x86_64/daxpy.c +++ b/kernel/x86_64/daxpy.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "daxpy_microk_piledriver-2.c" #elif defined(HASWELL) || defined(ZEN) #include "daxpy_microk_haswell-2.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "daxpy_microk_skylakex-2.c" #elif defined(SANDYBRIDGE) #include "daxpy_microk_sandy-2.c" diff --git a/kernel/x86_64/ddot.c b/kernel/x86_64/ddot.c index e4b6622e6..5d0c32234 100644 --- a/kernel/x86_64/ddot.c +++ b/kernel/x86_64/ddot.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ddot_microk_nehalem-2.c" #elif defined(HASWELL) || defined(ZEN) #include "ddot_microk_haswell-2.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "ddot_microk_skylakex-2.c" #elif defined(SANDYBRIDGE) #include "ddot_microk_sandy-2.c" diff --git a/kernel/x86_64/dgemv_n_4.c b/kernel/x86_64/dgemv_n_4.c index da68db0cd..f883d4f26 100644 --- a/kernel/x86_64/dgemv_n_4.c +++ b/kernel/x86_64/dgemv_n_4.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "dgemv_n_microk_nehalem-4.c" #elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) #include "dgemv_n_microk_haswell-4.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "dgemv_n_microk_skylakex-4.c" #endif diff --git a/kernel/x86_64/dgemv_t_4.c b/kernel/x86_64/dgemv_t_4.c index a3bf28dc8..9688c6bf3 100644 --- a/kernel/x86_64/dgemv_t_4.c +++ b/kernel/x86_64/dgemv_t_4.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "dgemv_t_microk_haswell-4.c" #endif diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c index d1270d20b..05c5c7f16 100644 --- a/kernel/x86_64/dscal.c +++ b/kernel/x86_64/dscal.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "dscal_microk_sandy-2.c" #elif defined(HASWELL) || defined(ZEN) #include "dscal_microk_haswell-2.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "dscal_microk_skylakex-2.c" #endif diff --git a/kernel/x86_64/dsymv_L.c b/kernel/x86_64/dsymv_L.c index 573377ee0..590776005 100644 --- a/kernel/x86_64/dsymv_L.c +++ b/kernel/x86_64/dsymv_L.c @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "dsymv_L_microk_bulldozer-2.c" #elif defined(HASWELL) || defined(ZEN) #include "dsymv_L_microk_haswell-2.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "dsymv_L_microk_skylakex-2.c" #elif defined(SANDYBRIDGE) #include "dsymv_L_microk_sandy-2.c" diff --git a/kernel/x86_64/dsymv_U.c b/kernel/x86_64/dsymv_U.c index 530ac8b1d..f196aa364 100644 --- a/kernel/x86_64/dsymv_U.c +++ b/kernel/x86_64/dsymv_U.c @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #include "dsymv_U_microk_bulldozer-2.c" -#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "dsymv_U_microk_haswell-2.c" #elif defined(SANDYBRIDGE) #include "dsymv_U_microk_sandy-2.c" diff --git a/kernel/x86_64/saxpy.c b/kernel/x86_64/saxpy.c index 7b2845636..ff911c52b 100644 --- a/kernel/x86_64/saxpy.c +++ b/kernel/x86_64/saxpy.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "saxpy_microk_nehalem-2.c" #elif defined(HASWELL) || defined(ZEN) #include "saxpy_microk_haswell-2.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "saxpy_microk_skylakex-2.c" #elif defined(SANDYBRIDGE) #include "saxpy_microk_sandy-2.c" diff --git a/kernel/x86_64/sbdot.c b/kernel/x86_64/sbdot.c index ef14fd618..a4e60b7c4 100644 --- a/kernel/x86_64/sbdot.c +++ b/kernel/x86_64/sbdot.c @@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(COOPERLAKE) +#if defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #include "sbdot_microk_cooperlake.c" #endif diff --git a/kernel/x86_64/sbgemv_n.c b/kernel/x86_64/sbgemv_n.c index 18e64dc3f..08ccace61 100644 --- a/kernel/x86_64/sbgemv_n.c +++ b/kernel/x86_64/sbgemv_n.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined (COOPERLAKE) +#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "sbgemv_n_microk_cooperlake.c" #endif diff --git a/kernel/x86_64/sbgemv_t.c b/kernel/x86_64/sbgemv_t.c index 22b099116..51ea0d937 100644 --- a/kernel/x86_64/sbgemv_t.c +++ b/kernel/x86_64/sbgemv_t.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined (COOPERLAKE) +#if defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "sbgemv_t_microk_cooperlake.c" #endif diff --git a/kernel/x86_64/sdot.c b/kernel/x86_64/sdot.c index e816c67e9..a0acea9d1 100644 --- a/kernel/x86_64/sdot.c +++ b/kernel/x86_64/sdot.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sdot_microk_nehalem-2.c" #elif defined(HASWELL) || defined(ZEN) #include "sdot_microk_haswell-2.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "sdot_microk_skylakex-2.c" #elif defined(SANDYBRIDGE) #include "sdot_microk_sandy-2.c" diff --git a/kernel/x86_64/sgemm_direct_skylakex.c b/kernel/x86_64/sgemm_direct_skylakex.c index 2588289d1..badeb0fbf 100644 --- a/kernel/x86_64/sgemm_direct_skylakex.c +++ b/kernel/x86_64/sgemm_direct_skylakex.c @@ -1,7 +1,7 @@ /* the direct sgemm code written by Arjan van der Ven */ #include "common.h" -#if defined(SKYLAKEX) || defined (COOPERLAKE) +#if defined(SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include diff --git a/kernel/x86_64/sgemv_n_4.c b/kernel/x86_64/sgemv_n_4.c index e0778006f..621ddc622 100644 --- a/kernel/x86_64/sgemv_n_4.c +++ b/kernel/x86_64/sgemv_n_4.c @@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sgemv_n_microk_sandy-4.c" #elif defined(HASWELL) || defined(ZEN) #include "sgemv_n_microk_haswell-4.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "sgemv_n_microk_haswell-4.c" #include "sgemv_n_microk_skylakex-8.c" #endif diff --git a/kernel/x86_64/sgemv_t_4.c b/kernel/x86_64/sgemv_t_4.c index a36c8ace9..0be2c7e97 100644 --- a/kernel/x86_64/sgemv_t_4.c +++ b/kernel/x86_64/sgemv_t_4.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "sgemv_t_microk_sandy-4.c" #elif defined(HASWELL) || defined(ZEN) #include "sgemv_t_microk_haswell-4.c" -#elif defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "sgemv_t_microk_haswell-4.c" #include "sgemv_t_microk_skylakex.c" #endif diff --git a/kernel/x86_64/ssymv_L.c b/kernel/x86_64/ssymv_L.c index c9d698eb7..29d6a9958 100644 --- a/kernel/x86_64/ssymv_L.c +++ b/kernel/x86_64/ssymv_L.c @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ssymv_L_microk_bulldozer-2.c" #elif defined(NEHALEM) #include "ssymv_L_microk_nehalem-2.c" -#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "ssymv_L_microk_haswell-2.c" #elif defined(SANDYBRIDGE) #include "ssymv_L_microk_sandy-2.c" diff --git a/kernel/x86_64/ssymv_U.c b/kernel/x86_64/ssymv_U.c index 4d8aac1ab..02bbc1c64 100644 --- a/kernel/x86_64/ssymv_U.c +++ b/kernel/x86_64/ssymv_U.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "ssymv_U_microk_bulldozer-2.c" #elif defined(NEHALEM) #include "ssymv_U_microk_nehalem-2.c" -#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "ssymv_U_microk_haswell-2.c" #elif defined(SANDYBRIDGE) #include "ssymv_U_microk_sandy-2.c" diff --git a/kernel/x86_64/symv_L_sse.S b/kernel/x86_64/symv_L_sse.S index fea4fc746..55780734f 100644 --- a/kernel/x86_64/symv_L_sse.S +++ b/kernel/x86_64/symv_L_sse.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 12) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 12) diff --git a/kernel/x86_64/symv_L_sse2.S b/kernel/x86_64/symv_L_sse2.S index b853ef365..77331d95f 100644 --- a/kernel/x86_64/symv_L_sse2.S +++ b/kernel/x86_64/symv_L_sse2.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 12) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 12) diff --git a/kernel/x86_64/symv_U_sse.S b/kernel/x86_64/symv_U_sse.S index bad367e91..b61182303 100644 --- a/kernel/x86_64/symv_U_sse.S +++ b/kernel/x86_64/symv_U_sse.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 12) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 12) diff --git a/kernel/x86_64/symv_U_sse2.S b/kernel/x86_64/symv_U_sse2.S index 147201751..99bc07d50 100644 --- a/kernel/x86_64/symv_U_sse2.S +++ b/kernel/x86_64/symv_U_sse2.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 12) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 24) diff --git a/kernel/x86_64/tobf16.c b/kernel/x86_64/tobf16.c index 3d1796621..a88fdcc2e 100644 --- a/kernel/x86_64/tobf16.c +++ b/kernel/x86_64/tobf16.c @@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif -#if defined(COOPERLAKE) +#if defined(COOPERLAKE) || defined(SAPPHIRERAPIDS) #if defined(DOUBLE) #include "dtobf16_microk_cooperlake.c" #elif defined(SINGLE) diff --git a/kernel/x86_64/zaxpy.c b/kernel/x86_64/zaxpy.c index 25e9f6d42..8786870bd 100644 --- a/kernel/x86_64/zaxpy.c +++ b/kernel/x86_64/zaxpy.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "zaxpy_microk_bulldozer-2.c" #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #include "zaxpy_microk_steamroller-2.c" -#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "zaxpy_microk_haswell-2.c" #elif defined(SANDYBRIDGE) #include "zaxpy_microk_sandy-2.c" diff --git a/kernel/x86_64/zdot.c b/kernel/x86_64/zdot.c index 1bc785ac1..50c8a2678 100644 --- a/kernel/x86_64/zdot.c +++ b/kernel/x86_64/zdot.c @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "zdot_microk_bulldozer-2.c" #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) #include "zdot_microk_steamroller-2.c" -#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "zdot_microk_haswell-2.c" #elif defined(SANDYBRIDGE) #include "zdot_microk_sandy-2.c" diff --git a/kernel/x86_64/zgemv_n_4.c b/kernel/x86_64/zgemv_n_4.c index 1f9d41859..2d6866a78 100644 --- a/kernel/x86_64/zgemv_n_4.c +++ b/kernel/x86_64/zgemv_n_4.c @@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "zgemv_n_microk_haswell-4.c" #elif defined(SANDYBRIDGE) #include "zgemv_n_microk_sandy-4.c" diff --git a/kernel/x86_64/zgemv_t_4.c b/kernel/x86_64/zgemv_t_4.c index 34f28b224..c2791e0f3 100644 --- a/kernel/x86_64/zgemv_t_4.c +++ b/kernel/x86_64/zgemv_t_4.c @@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #include "zgemv_t_microk_bulldozer-4.c" -#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "zgemv_t_microk_haswell-4.c" #endif diff --git a/kernel/x86_64/zscal.c b/kernel/x86_64/zscal.c index 09a702a81..3744c98bb 100644 --- a/kernel/x86_64/zscal.c +++ b/kernel/x86_64/zscal.c @@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" -#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #include "zscal_microk_haswell-2.c" #elif defined(BULLDOZER) || defined(PILEDRIVER) #include "zscal_microk_bulldozer-2.c" diff --git a/kernel/x86_64/zsymv_L_sse.S b/kernel/x86_64/zsymv_L_sse.S index 83ed41ba1..df190c64c 100644 --- a/kernel/x86_64/zsymv_L_sse.S +++ b/kernel/x86_64/zsymv_L_sse.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 24) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 24) diff --git a/kernel/x86_64/zsymv_L_sse2.S b/kernel/x86_64/zsymv_L_sse2.S index 7ed2faf0f..cba167f4d 100644 --- a/kernel/x86_64/zsymv_L_sse2.S +++ b/kernel/x86_64/zsymv_L_sse2.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 24) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 24) diff --git a/kernel/x86_64/zsymv_U_sse.S b/kernel/x86_64/zsymv_U_sse.S index 5945f3f81..13176ce9c 100644 --- a/kernel/x86_64/zsymv_U_sse.S +++ b/kernel/x86_64/zsymv_U_sse.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 24) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 24) diff --git a/kernel/x86_64/zsymv_U_sse2.S b/kernel/x86_64/zsymv_U_sse2.S index 484d74f14..1657885c0 100644 --- a/kernel/x86_64/zsymv_U_sse2.S +++ b/kernel/x86_64/zsymv_U_sse2.S @@ -57,7 +57,7 @@ #define PREFETCHSIZE (16 * 24) #endif -#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX) || defined (COOPERLAKE) || defined (SAPPHIRERAPIDS) #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 #define PREFETCHSIZE (16 * 24) diff --git a/param.h b/param.h index e8e49ce43..4e83714d1 100644 --- a/param.h +++ b/param.h @@ -1751,6 +1751,125 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif +#ifdef SAPPHIRERAPIDS + +#define SNUMOPT 16 +#define DNUMOPT 8 + +#define GEMM_DEFAULT_OFFSET_A 0 +#define GEMM_DEFAULT_OFFSET_B 0 +#define GEMM_DEFAULT_ALIGN 0x03fffUL + +#define SYMV_P 8 + +#if defined(XDOUBLE) || defined(DOUBLE) +#define SWITCH_RATIO 8 +#define GEMM_PREFERED_SIZE 8 +#else +#define SWITCH_RATIO 16 +#define GEMM_PREFERED_SIZE 16 +#endif +#define USE_SGEMM_KERNEL_DIRECT 1 + +#ifdef ARCH_X86 + +#define SGEMM_DEFAULT_UNROLL_M 4 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define QGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_M 1 +#define XGEMM_DEFAULT_UNROLL_M 1 + +#define SGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_N 4 +#define QGEMM_DEFAULT_UNROLL_N 2 +#define CGEMM_DEFAULT_UNROLL_N 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 +#define XGEMM_DEFAULT_UNROLL_N 1 + +#else + +#define SGEMM_DEFAULT_UNROLL_M 16 +#define DGEMM_DEFAULT_UNROLL_M 16 +#define QGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_M 8 +#define ZGEMM_DEFAULT_UNROLL_M 4 +#define XGEMM_DEFAULT_UNROLL_M 1 + +#define SGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_N 2 +#define QGEMM_DEFAULT_UNROLL_N 2 +#define CGEMM_DEFAULT_UNROLL_N 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 +#define XGEMM_DEFAULT_UNROLL_N 1 + +#define SGEMM_DEFAULT_UNROLL_MN 32 +#define DGEMM_DEFAULT_UNROLL_MN 32 +#endif + +#ifdef ARCH_X86 + +#define SGEMM_DEFAULT_P 512 +#define SGEMM_DEFAULT_R sgemm_r +#define DGEMM_DEFAULT_P 512 +#define DGEMM_DEFAULT_R dgemm_r +#define QGEMM_DEFAULT_P 504 +#define QGEMM_DEFAULT_R qgemm_r +#define CGEMM_DEFAULT_P 128 +#define CGEMM_DEFAULT_R 1024 +#define ZGEMM_DEFAULT_P 512 +#define ZGEMM_DEFAULT_R zgemm_r +#define XGEMM_DEFAULT_P 252 +#define XGEMM_DEFAULT_R xgemm_r +#define SGEMM_DEFAULT_Q 256 +#define DGEMM_DEFAULT_Q 256 +#define QGEMM_DEFAULT_Q 128 +#define CGEMM_DEFAULT_Q 256 +#define ZGEMM_DEFAULT_Q 192 +#define XGEMM_DEFAULT_Q 128 + +#else + +#define SGEMM_DEFAULT_P 640 +#define DGEMM_DEFAULT_P 192 +#define CGEMM_DEFAULT_P 384 +#define ZGEMM_DEFAULT_P 256 + +#define SGEMM_DEFAULT_Q 320 +#define DGEMM_DEFAULT_Q 384 +#define CGEMM_DEFAULT_Q 192 +#define ZGEMM_DEFAULT_Q 128 + +#define SGEMM_DEFAULT_R sgemm_r +#define DGEMM_DEFAULT_R 8640 +#define CGEMM_DEFAULT_R cgemm_r +#define ZGEMM_DEFAULT_R zgemm_r + +#define QGEMM_DEFAULT_Q 128 +#define QGEMM_DEFAULT_P 504 +#define QGEMM_DEFAULT_R qgemm_r +#define XGEMM_DEFAULT_P 252 +#define XGEMM_DEFAULT_R xgemm_r +#define XGEMM_DEFAULT_Q 128 + +#define CGEMM3M_DEFAULT_UNROLL_N 4 +#define CGEMM3M_DEFAULT_UNROLL_M 8 +#define ZGEMM3M_DEFAULT_UNROLL_N 4 +#define ZGEMM3M_DEFAULT_UNROLL_M 4 + +#define CGEMM3M_DEFAULT_P 320 +#define ZGEMM3M_DEFAULT_P 256 +#define XGEMM3M_DEFAULT_P 112 +#define CGEMM3M_DEFAULT_Q 320 +#define ZGEMM3M_DEFAULT_Q 256 +#define XGEMM3M_DEFAULT_Q 224 +#define CGEMM3M_DEFAULT_R 12288 +#define ZGEMM3M_DEFAULT_R 12288 +#define XGEMM3M_DEFAULT_R 12288 + +#endif +#endif + #ifdef COOPERLAKE #define SNUMOPT 16