diff --git a/Makefile.system b/Makefile.system index 343b94bb3..e37b8b45a 100644 --- a/Makefile.system +++ b/Makefile.system @@ -645,7 +645,7 @@ DYNAMIC_CORE += HASWELL ZEN endif ifneq ($(NO_AVX512), 1) ifneq ($(NO_AVX2), 1) -DYNAMIC_CORE += SKYLAKEX COOPERLAKE +DYNAMIC_CORE += SKYLAKEX COOPERLAKE SAPPHIRERAPIDS endif endif endif diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index f61930983..dee5538fa 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -268,9 +268,11 @@ extern gotoblas_t gotoblas_ZEN; #ifndef NO_AVX512 extern gotoblas_t gotoblas_SKYLAKEX; extern gotoblas_t gotoblas_COOPERLAKE; +extern gotoblas_t gotoblas_SAPPHIRERAPIDS; #else #define gotoblas_SKYLAKEX gotoblas_HASWELL #define gotoblas_COOPERLAKE gotoblas_HASWELL +#define gotoblas_SAPPHIRERAPIDS gotoblas_HASWELL #endif #endif #else @@ -279,6 +281,7 @@ extern gotoblas_t gotoblas_COOPERLAKE; #define gotoblas_HASWELL gotoblas_NEHALEM #define gotoblas_SKYLAKEX gotoblas_NEHALEM #define gotoblas_COOPERLAKE gotoblas_NEHALEM +#define gotoblas_SAPPHIRERAPIDS gotoblas_NEHALEM #define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA #define gotoblas_STEAMROLLER gotoblas_BARCELONA @@ -378,6 +381,31 @@ int support_avx512_bf16(){ #endif } +#define BIT_AMX_TILE 0x01000000 +#define BIT_AMX_BF16 0x00400000 +#define BIT_AMX_ENBD 0x00060000 + +int support_amx_bf16() { +#if !defined(NO_AVX) && !defined(NO_AVX512) + int eax, ebx, ecx, edx; + int ret=0; + + if (!support_avx512()) + return 0; + // CPUID.7.0:EDX indicates AMX support + cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); + if ((edx & BIT_AMX_TILE) && (edx & BIT_AMX_BF16)) { + // CPUID.D.0:EAX[17:18] indicates AMX enabled + cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx); + if ((eax & BIT_AMX_ENBD) == BIT_AMX_ENBD) + ret = 1; + } + return ret; +#else + return 0; +#endif +} + extern void openblas_warning(int verbose, const char * msg); #define FALLBACK_VERBOSE 1 #define NEHALEM_FALLBACK "OpenBLAS : Your OS does not support AVX instructions. OpenBLAS is using Nehalem kernels as a fallback, which may give poorer performance.\n" @@ -689,6 +717,8 @@ static gotoblas_t *get_coretype(void){ } } if (model == 15){ // Sapphire Rapids + if(support_amx_bf16()) + return &gotoblas_SAPPHIRERAPIDS; if(support_avx512_bf16()) return &gotoblas_COOPERLAKE; if (support_avx512()) @@ -941,7 +971,8 @@ static char *corename[] = { "Excavator", "Zen", "SkylakeX", - "Cooperlake" + "Cooperlake", + "SapphireRapids" }; char *gotoblas_corename(void) { @@ -1006,6 +1037,7 @@ char *gotoblas_corename(void) { if (gotoblas == &gotoblas_ZEN) return corename[23]; if (gotoblas == &gotoblas_SKYLAKEX) return corename[24]; if (gotoblas == &gotoblas_COOPERLAKE) return corename[25]; + if (gotoblas == &gotoblas_SAPPHIRERAPIDS) return corename[26]; return corename[0]; } diff --git a/kernel/x86_64/KERNEL.SAPPHIRERAPIDS b/kernel/x86_64/KERNEL.SAPPHIRERAPIDS index 88f574668..d101503bb 100644 --- a/kernel/x86_64/KERNEL.SAPPHIRERAPIDS +++ b/kernel/x86_64/KERNEL.SAPPHIRERAPIDS @@ -1,6 +1,14 @@ include $(KERNELDIR)/KERNEL.COOPERLAKE -SBGEMM_SMALL_M_PERMIT = sbgemm_small_kernel_permit_spr.c +undefine SBGEMM_SMALL_M_PERMIT +undefine SBGEMM_SMALL_K_NN +undefine SBGEMM_SMALL_K_B0_NN +undefine SBGEMM_SMALL_K_NT +undefine SBGEMM_SMALL_K_B0_NT +undefine SBGEMM_SMALL_K_TN +undefine SBGEMM_SMALL_K_B0_TN +undefine SBGEMM_SMALL_K_TT +undefine SBGEMM_SMALL_K_B0_TT SBGEMM_BETA = sgemm_beta_skylakex.c SBGEMMKERNEL = sbgemm_kernel_16x16_spr.c