Initial support for SkylakeX / AVX512

This patch adds the basic infrastructure for adding the SkylakeX (Intel Skylake server)
target. The SkylakeX target will use the AVX512 (AVX512VL level) instruction set,
which brings 2 basic things:
1) 512 bit wide SIMD (2x width of AVX2)
2) 32 SIMD registers (2x the number on AVX2)

This initial patch only contains a trivial transofrmation of the Haswell SGEMM kernel
to AVX512VL; more will follow later but this patch aims to get the infrastructure
in place for this "later".

Full performance tuning has not been done yet; with more registers and wider SIMD
it's in theory possible to retune the kernels but even without that there's an
interesting enough performance increase (30-40% range) with just this change.
This commit is contained in:
Arjan van de Ven 2018-06-03 07:24:29 +00:00
parent 36c4523d85
commit 99c7bba8e4
57 changed files with 7034 additions and 47 deletions

View File

@ -62,6 +62,9 @@ ifeq ($(BINARY), 32)
ifeq ($(TARGET), HASWELL) ifeq ($(TARGET), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
ifeq ($(TARGET), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET), SANDYBRIDGE) ifeq ($(TARGET), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
@ -95,6 +98,9 @@ ifeq ($(BINARY), 32)
ifeq ($(TARGET_CORE), HASWELL) ifeq ($(TARGET_CORE), HASWELL)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
ifeq ($(TARGET_CORE), SKYLAKEX)
GETARCH_FLAGS := -DFORCE_NEHALEM
endif
ifeq ($(TARGET_CORE), SANDYBRIDGE) ifeq ($(TARGET_CORE), SANDYBRIDGE)
GETARCH_FLAGS := -DFORCE_NEHALEM GETARCH_FLAGS := -DFORCE_NEHALEM
endif endif
@ -467,7 +473,7 @@ ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
endif endif
ifneq ($(NO_AVX2), 1) ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += HASWELL ZEN DYNAMIC_CORE += HASWELL ZEN SKYLAKEX
endif endif
endif endif

View File

@ -20,6 +20,7 @@ DUNNINGTON
NEHALEM NEHALEM
SANDYBRIDGE SANDYBRIDGE
HASWELL HASWELL
SKYLAKEX
ATOM ATOM
b)AMD CPU: b)AMD CPU:

View File

@ -56,6 +56,9 @@ if (DYNAMIC_ARCH)
if (NOT NO_AVX2) if (NOT NO_AVX2)
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN) set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
endif () endif ()
if (NOT NO_AVX512)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
endif ()
endif () endif ()
if (NOT DYNAMIC_CORE) if (NOT DYNAMIC_CORE)

View File

@ -33,7 +33,7 @@ endif ()
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
message(STATUS "Compiling a ${BINARY}-bit binary.") message(STATUS "Compiling a ${BINARY}-bit binary.")
set(NO_AVX 1) set(NO_AVX 1)
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
set(TARGET "NEHALEM") set(TARGET "NEHALEM")
endif () endif ()
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN") if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")

View File

@ -115,6 +115,7 @@
#define CORE_STEAMROLLER 25 #define CORE_STEAMROLLER 25
#define CORE_EXCAVATOR 26 #define CORE_EXCAVATOR 26
#define CORE_ZEN 27 #define CORE_ZEN 27
#define CORE_SKYLAKEX 28
#define HAVE_SSE (1 << 0) #define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1) #define HAVE_SSE2 (1 << 1)
@ -137,6 +138,7 @@
#define HAVE_AVX (1 << 18) #define HAVE_AVX (1 << 18)
#define HAVE_FMA4 (1 << 19) #define HAVE_FMA4 (1 << 19)
#define HAVE_FMA3 (1 << 20) #define HAVE_FMA3 (1 << 20)
#define HAVE_AVX512VL (1 << 21)
#define CACHE_INFO_L1_I 1 #define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2 #define CACHE_INFO_L1_D 2
@ -211,5 +213,6 @@ typedef struct {
#define CPUTYPE_STEAMROLLER 49 #define CPUTYPE_STEAMROLLER 49
#define CPUTYPE_EXCAVATOR 50 #define CPUTYPE_EXCAVATOR 50
#define CPUTYPE_ZEN 51 #define CPUTYPE_ZEN 51
#define CPUTYPE_SKYLAKEX 52
#endif #endif

View File

@ -50,6 +50,8 @@
#ifdef NO_AVX #ifdef NO_AVX
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM #define CPUTYPE_HASWELL CPUTYPE_NEHALEM
#define CORE_HASWELL CORE_NEHALEM #define CORE_HASWELL CORE_NEHALEM
#define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
#define CORE_SKYLAKEX CORE_NEHALEM
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM #define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
#define CORE_SANDYBRIDGE CORE_NEHALEM #define CORE_SANDYBRIDGE CORE_NEHALEM
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA #define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA

View File

@ -74,6 +74,7 @@ extern gotoblas_t gotoblas_STEAMROLLER;
extern gotoblas_t gotoblas_EXCAVATOR; extern gotoblas_t gotoblas_EXCAVATOR;
#ifdef NO_AVX2 #ifdef NO_AVX2
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE #define gotoblas_HASWELL gotoblas_SANDYBRIDGE
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
#define gotoblas_ZEN gotoblas_SANDYBRIDGE #define gotoblas_ZEN gotoblas_SANDYBRIDGE
#else #else
extern gotoblas_t gotoblas_HASWELL; extern gotoblas_t gotoblas_HASWELL;
@ -83,6 +84,7 @@ extern gotoblas_t gotoblas_ZEN;
//Use NEHALEM kernels for sandy bridge //Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#define gotoblas_HASWELL gotoblas_NEHALEM #define gotoblas_HASWELL gotoblas_NEHALEM
#define gotoblas_SKYLAKEX gotoblas_NEHALEM
#define gotoblas_BULLDOZER gotoblas_BARCELONA #define gotoblas_BULLDOZER gotoblas_BARCELONA
#define gotoblas_PILEDRIVER gotoblas_BARCELONA #define gotoblas_PILEDRIVER gotoblas_BARCELONA
#define gotoblas_STEAMROLLER gotoblas_BARCELONA #define gotoblas_STEAMROLLER gotoblas_BARCELONA

View File

@ -167,7 +167,7 @@ int get_L2_size(void){
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
cpuid(0x80000006, &eax, &ebx, &ecx, &edx); cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
@ -251,7 +251,7 @@ int get_L2_size(void){
void blas_set_parameter(void){ void blas_set_parameter(void){
int factor; int factor;
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
int size = 16; int size = 16;
#else #else
int size = get_L2_size(); int size = get_L2_size();

View File

@ -326,6 +326,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "HASWELL" #define CORENAME "HASWELL"
#endif #endif
#ifdef FORCE_SKYLAKEX
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "SKYLAKEX"
#define ARCHCONFIG "-DSKYLAKEX " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
"-DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
#define LIBNAME "skylakex"
#define CORENAME "SKYLAKEX"
#endif
#ifdef FORCE_ATOM #ifdef FORCE_ATOM
#define FORCE #define FORCE
#define FORCE_INTEL #define FORCE_INTEL

View File

@ -121,7 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
# Makefile.L3 # Makefile.L3
set(USE_TRMM false) set(USE_TRMM false)
if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen") if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex")
set(USE_TRMM true) set(USE_TRMM true)
endif () endif ()

View File

@ -32,6 +32,10 @@ ifeq ($(CORE), HASWELL)
USE_TRMM = 1 USE_TRMM = 1
endif endif
ifeq ($(CORE), SKYLAKEX)
USE_TRMM = 1
endif
ifeq ($(CORE), ZEN) ifeq ($(CORE), ZEN)
USE_TRMM = 1 USE_TRMM = 1
endif endif

View File

@ -871,6 +871,22 @@ static void init_parameter(void) {
#endif #endif
#endif #endif
#ifdef SKYLAKEX
#ifdef DEBUG
fprintf(stderr, "SkylakeX\n");
#endif
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif
#ifdef OPTERON #ifdef OPTERON

View File

@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif

View File

@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif

View File

@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif

View File

@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL || defined (SKYLAKEX))
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif

View File

@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif

View File

@ -62,7 +62,7 @@
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHSIZE (8 * 21 + 4) #define PREFETCHSIZE (8 * 21 + 4)
#endif #endif

View File

@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif

View File

@ -63,7 +63,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif

View File

@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif

View File

@ -63,7 +63,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif

View File

@ -61,7 +61,7 @@
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
#define PREFETCH prefetcht1 #define PREFETCH prefetcht1
#define PREFETCHSIZE 84 #define PREFETCHSIZE 84
#endif #endif

View File

@ -0,0 +1,4 @@
include $(KERNELDIR)/KERNEL.HASWELL
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S

View File

@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "caxpy_microk_steamroller-2.c" #include "caxpy_microk_steamroller-2.c"
#elif defined(BULLDOZER) #elif defined(BULLDOZER)
#include "caxpy_microk_bulldozer-2.c" #include "caxpy_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX)
#include "caxpy_microk_haswell-2.c" #include "caxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "caxpy_microk_sandy-2.c" #include "caxpy_microk_sandy-2.c"

View File

@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cdot_microk_bulldozer-2.c" #include "cdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "cdot_microk_steamroller-2.c" #include "cdot_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cdot_microk_haswell-2.c" #include "cdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "cdot_microk_sandy-2.c" #include "cdot_microk_sandy-2.c"

View File

@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdio.h> #include <stdio.h>
#include "common.h" #include "common.h"
#if defined(HASWELL) || defined(ZEN) #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cgemv_n_microk_haswell-4.c" #include "cgemv_n_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_n_microk_bulldozer-4.c" #include "cgemv_n_microk_bulldozer-4.c"

View File

@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"
#if defined(HASWELL) || defined(ZEN) #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cgemv_t_microk_haswell-4.c" #include "cgemv_t_microk_haswell-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "cgemv_t_microk_bulldozer-4.c" #include "cgemv_t_microk_bulldozer-4.c"

View File

@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"
#if defined(HASWELL) || defined(ZEN) #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "cscal_microk_haswell-2.c" #include "cscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) #elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "cscal_microk_bulldozer-2.c" #include "cscal_microk_bulldozer-2.c"

View File

@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "daxpy_microk_steamroller-2.c" #include "daxpy_microk_steamroller-2.c"
#elif defined(PILEDRIVER) #elif defined(PILEDRIVER)
#include "daxpy_microk_piledriver-2.c" #include "daxpy_microk_piledriver-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "daxpy_microk_haswell-2.c" #include "daxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "daxpy_microk_sandy-2.c" #include "daxpy_microk_sandy-2.c"

View File

@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ddot_microk_piledriver-2.c" #include "ddot_microk_piledriver-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "ddot_microk_nehalem-2.c" #include "ddot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "ddot_microk_haswell-2.c" #include "ddot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "ddot_microk_sandy-2.c" #include "ddot_microk_sandy-2.c"

View File

@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(NEHALEM) #if defined(NEHALEM)
#include "dgemv_n_microk_nehalem-4.c" #include "dgemv_n_microk_nehalem-4.c"
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
#include "dgemv_n_microk_haswell-4.c" #include "dgemv_n_microk_haswell-4.c"
#endif #endif

View File

@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) #if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
#include "dgemv_t_microk_haswell-4.c" #include "dgemv_t_microk_haswell-4.c"
#endif #endif

View File

@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "dscal_microk_bulldozer-2.c" #include "dscal_microk_bulldozer-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "dscal_microk_sandy-2.c" #include "dscal_microk_sandy-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "dscal_microk_haswell-2.c" #include "dscal_microk_haswell-2.c"
#endif #endif

View File

@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_L_microk_bulldozer-2.c" #include "dsymv_L_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "dsymv_L_microk_haswell-2.c" #include "dsymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "dsymv_L_microk_sandy-2.c" #include "dsymv_L_microk_sandy-2.c"

View File

@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "dsymv_U_microk_bulldozer-2.c" #include "dsymv_U_microk_bulldozer-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "dsymv_U_microk_haswell-2.c" #include "dsymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "dsymv_U_microk_sandy-2.c" #include "dsymv_U_microk_sandy-2.c"

View File

@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(NEHALEM) #if defined(NEHALEM)
#include "saxpy_microk_nehalem-2.c" #include "saxpy_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "saxpy_microk_haswell-2.c" #include "saxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "saxpy_microk_sandy-2.c" #include "saxpy_microk_sandy-2.c"

View File

@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sdot_microk_steamroller-2.c" #include "sdot_microk_steamroller-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "sdot_microk_nehalem-2.c" #include "sdot_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "sdot_microk_haswell-2.c" #include "sdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "sdot_microk_sandy-2.c" #include "sdot_microk_sandy-2.c"

File diff suppressed because it is too large Load Diff

View File

@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_n_microk_nehalem-4.c" #include "sgemv_n_microk_nehalem-4.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "sgemv_n_microk_sandy-4.c" #include "sgemv_n_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "sgemv_n_microk_haswell-4.c" #include "sgemv_n_microk_haswell-4.c"
#endif #endif

View File

@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "sgemv_t_microk_bulldozer-4.c" #include "sgemv_t_microk_bulldozer-4.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "sgemv_t_microk_sandy-4.c" #include "sgemv_t_microk_sandy-4.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "sgemv_t_microk_haswell-4.c" #include "sgemv_t_microk_haswell-4.c"
#endif #endif

View File

@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_L_microk_bulldozer-2.c" #include "ssymv_L_microk_bulldozer-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "ssymv_L_microk_nehalem-2.c" #include "ssymv_L_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "ssymv_L_microk_haswell-2.c" #include "ssymv_L_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "ssymv_L_microk_sandy-2.c" #include "ssymv_L_microk_sandy-2.c"

View File

@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "ssymv_U_microk_bulldozer-2.c" #include "ssymv_U_microk_bulldozer-2.c"
#elif defined(NEHALEM) #elif defined(NEHALEM)
#include "ssymv_U_microk_nehalem-2.c" #include "ssymv_U_microk_nehalem-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "ssymv_U_microk_haswell-2.c" #include "ssymv_U_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "ssymv_U_microk_sandy-2.c" #include "ssymv_U_microk_sandy-2.c"

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 12) #define PREFETCHSIZE (16 * 12)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)

View File

@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zaxpy_microk_bulldozer-2.c" #include "zaxpy_microk_bulldozer-2.c"
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zaxpy_microk_steamroller-2.c" #include "zaxpy_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zaxpy_microk_haswell-2.c" #include "zaxpy_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "zaxpy_microk_sandy-2.c" #include "zaxpy_microk_sandy-2.c"

View File

@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "zdot_microk_bulldozer-2.c" #include "zdot_microk_bulldozer-2.c"
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR) #elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
#include "zdot_microk_steamroller-2.c" #include "zdot_microk_steamroller-2.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zdot_microk_haswell-2.c" #include "zdot_microk_haswell-2.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "zdot_microk_sandy-2.c" #include "zdot_microk_sandy-2.c"

View File

@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"
#if defined(HASWELL) || defined(ZEN) #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zgemv_n_microk_haswell-4.c" #include "zgemv_n_microk_haswell-4.c"
#elif defined(SANDYBRIDGE) #elif defined(SANDYBRIDGE)
#include "zgemv_n_microk_sandy-4.c" #include "zgemv_n_microk_sandy-4.c"

View File

@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR) #if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
#include "zgemv_t_microk_bulldozer-4.c" #include "zgemv_t_microk_bulldozer-4.c"
#elif defined(HASWELL) || defined(ZEN) #elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zgemv_t_microk_haswell-4.c" #include "zgemv_t_microk_haswell-4.c"
#endif #endif

View File

@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h" #include "common.h"
#if defined(HASWELL) || defined(ZEN) #if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#include "zscal_microk_haswell-2.c" #include "zscal_microk_haswell-2.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER) #elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "zscal_microk_bulldozer-2.c" #include "zscal_microk_bulldozer-2.c"

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)

View File

@ -57,7 +57,7 @@
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)
#endif #endif
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) #if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
#define PREFETCH prefetcht0 #define PREFETCH prefetcht0
#define PREFETCHW prefetcht0 #define PREFETCHW prefetcht0
#define PREFETCHSIZE (16 * 24) #define PREFETCHSIZE (16 * 24)

119
param.h
View File

@ -1613,6 +1613,125 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif
#ifdef SKYLAKEX
#define SNUMOPT 16
#define DNUMOPT 8
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SYMV_P 8
#define SWITCH_RATIO 4
#ifdef ARCH_X86
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_M 4
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 8
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#define SGEMM_DEFAULT_UNROLL_MN 32
#define DGEMM_DEFAULT_UNROLL_MN 32
#endif
#ifdef ARCH_X86
#define SGEMM_DEFAULT_P 512
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_P 512
#define DGEMM_DEFAULT_R dgemm_r
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_R 1024
#define ZGEMM_DEFAULT_P 512
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define SGEMM_DEFAULT_Q 256
#define DGEMM_DEFAULT_Q 256
#define QGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 256
#define ZGEMM_DEFAULT_Q 192
#define XGEMM_DEFAULT_Q 128
#else
#define SGEMM_DEFAULT_P 768
#define DGEMM_DEFAULT_P 512
#define CGEMM_DEFAULT_P 384
#define ZGEMM_DEFAULT_P 256
#ifdef WINDOWS_ABI
#define SGEMM_DEFAULT_Q 320
#define DGEMM_DEFAULT_Q 128
#else
#define SGEMM_DEFAULT_Q 384
#define DGEMM_DEFAULT_Q 256
#endif
#define CGEMM_DEFAULT_Q 192
#define ZGEMM_DEFAULT_Q 128
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R 13824
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define QGEMM_DEFAULT_Q 128
#define QGEMM_DEFAULT_P 504
#define QGEMM_DEFAULT_R qgemm_r
#define XGEMM_DEFAULT_P 252
#define XGEMM_DEFAULT_R xgemm_r
#define XGEMM_DEFAULT_Q 128
#define CGEMM3M_DEFAULT_UNROLL_N 8
#define CGEMM3M_DEFAULT_UNROLL_M 4
#define ZGEMM3M_DEFAULT_UNROLL_N 8
#define ZGEMM3M_DEFAULT_UNROLL_M 2
#define CGEMM3M_DEFAULT_P 448
#define ZGEMM3M_DEFAULT_P 224
#define XGEMM3M_DEFAULT_P 112
#define CGEMM3M_DEFAULT_Q 224
#define ZGEMM3M_DEFAULT_Q 224
#define XGEMM3M_DEFAULT_Q 224
#define CGEMM3M_DEFAULT_R 12288
#define ZGEMM3M_DEFAULT_R 12288
#define XGEMM3M_DEFAULT_R 12288
#endif
#endif #endif