Initial support for SkylakeX / AVX512
This patch adds the basic infrastructure for adding the SkylakeX (Intel Skylake server) target. The SkylakeX target will use the AVX512 (AVX512VL level) instruction set, which brings 2 basic things: 1) 512 bit wide SIMD (2x width of AVX2) 2) 32 SIMD registers (2x the number on AVX2) This initial patch only contains a trivial transofrmation of the Haswell SGEMM kernel to AVX512VL; more will follow later but this patch aims to get the infrastructure in place for this "later". Full performance tuning has not been done yet; with more registers and wider SIMD it's in theory possible to retune the kernels but even without that there's an interesting enough performance increase (30-40% range) with just this change.
This commit is contained in:
parent
36c4523d85
commit
99c7bba8e4
|
@ -62,6 +62,9 @@ ifeq ($(BINARY), 32)
|
|||
ifeq ($(TARGET), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
|
@ -95,6 +98,9 @@ ifeq ($(BINARY), 32)
|
|||
ifeq ($(TARGET_CORE), HASWELL)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SKYLAKEX)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
ifeq ($(TARGET_CORE), SANDYBRIDGE)
|
||||
GETARCH_FLAGS := -DFORCE_NEHALEM
|
||||
endif
|
||||
|
@ -467,7 +473,7 @@ ifneq ($(NO_AVX), 1)
|
|||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR
|
||||
endif
|
||||
ifneq ($(NO_AVX2), 1)
|
||||
DYNAMIC_CORE += HASWELL ZEN
|
||||
DYNAMIC_CORE += HASWELL ZEN SKYLAKEX
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ DUNNINGTON
|
|||
NEHALEM
|
||||
SANDYBRIDGE
|
||||
HASWELL
|
||||
SKYLAKEX
|
||||
ATOM
|
||||
|
||||
b)AMD CPU:
|
||||
|
|
|
@ -56,6 +56,9 @@ if (DYNAMIC_ARCH)
|
|||
if (NOT NO_AVX2)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
|
||||
endif ()
|
||||
if (NOT NO_AVX512)
|
||||
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
if (NOT DYNAMIC_CORE)
|
||||
|
|
|
@ -33,7 +33,7 @@ endif ()
|
|||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
|
||||
message(STATUS "Compiling a ${BINARY}-bit binary.")
|
||||
set(NO_AVX 1)
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE")
|
||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
|
||||
set(TARGET "NEHALEM")
|
||||
endif ()
|
||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
|
||||
|
|
3
cpuid.h
3
cpuid.h
|
@ -115,6 +115,7 @@
|
|||
#define CORE_STEAMROLLER 25
|
||||
#define CORE_EXCAVATOR 26
|
||||
#define CORE_ZEN 27
|
||||
#define CORE_SKYLAKEX 28
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
|
@ -137,6 +138,7 @@
|
|||
#define HAVE_AVX (1 << 18)
|
||||
#define HAVE_FMA4 (1 << 19)
|
||||
#define HAVE_FMA3 (1 << 20)
|
||||
#define HAVE_AVX512VL (1 << 21)
|
||||
|
||||
#define CACHE_INFO_L1_I 1
|
||||
#define CACHE_INFO_L1_D 2
|
||||
|
@ -211,5 +213,6 @@ typedef struct {
|
|||
#define CPUTYPE_STEAMROLLER 49
|
||||
#define CPUTYPE_EXCAVATOR 50
|
||||
#define CPUTYPE_ZEN 51
|
||||
#define CPUTYPE_SKYLAKEX 52
|
||||
|
||||
#endif
|
||||
|
|
|
@ -50,6 +50,8 @@
|
|||
#ifdef NO_AVX
|
||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM
|
||||
#define CORE_HASWELL CORE_NEHALEM
|
||||
#define CPUTYPE_SKYLAKEX CPUTYPE_NEHALEM
|
||||
#define CORE_SKYLAKEX CORE_NEHALEM
|
||||
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
|
||||
#define CORE_SANDYBRIDGE CORE_NEHALEM
|
||||
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
|
||||
|
|
|
@ -74,6 +74,7 @@ extern gotoblas_t gotoblas_STEAMROLLER;
|
|||
extern gotoblas_t gotoblas_EXCAVATOR;
|
||||
#ifdef NO_AVX2
|
||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
|
||||
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
|
||||
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
|
||||
#else
|
||||
extern gotoblas_t gotoblas_HASWELL;
|
||||
|
@ -83,6 +84,7 @@ extern gotoblas_t gotoblas_ZEN;
|
|||
//Use NEHALEM kernels for sandy bridge
|
||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||
#define gotoblas_HASWELL gotoblas_NEHALEM
|
||||
#define gotoblas_SKYLAKEX gotoblas_NEHALEM
|
||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
||||
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
|
||||
|
|
|
@ -167,7 +167,7 @@ int get_L2_size(void){
|
|||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
|
||||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
|
||||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
|
@ -251,7 +251,7 @@ int get_L2_size(void){
|
|||
void blas_set_parameter(void){
|
||||
|
||||
int factor;
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
|
||||
int size = 16;
|
||||
#else
|
||||
int size = get_L2_size();
|
||||
|
|
15
getarch.c
15
getarch.c
|
@ -326,6 +326,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define CORENAME "HASWELL"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_SKYLAKEX
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "SKYLAKEX"
|
||||
#define ARCHCONFIG "-DSKYLAKEX " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||
"-DFMA3 -DHAVE_AVX512VL -march=skylake-avx512"
|
||||
#define LIBNAME "skylakex"
|
||||
#define CORENAME "SKYLAKEX"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ATOM
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
|
|
|
@ -121,7 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
|||
# Makefile.L3
|
||||
set(USE_TRMM false)
|
||||
|
||||
if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen")
|
||||
if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex")
|
||||
set(USE_TRMM true)
|
||||
endif ()
|
||||
|
||||
|
|
|
@ -32,6 +32,10 @@ ifeq ($(CORE), HASWELL)
|
|||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SKYLAKEX)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ZEN)
|
||||
USE_TRMM = 1
|
||||
endif
|
||||
|
|
|
@ -871,6 +871,22 @@ static void init_parameter(void) {
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef SKYLAKEX
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "SkylakeX\n");
|
||||
#endif
|
||||
|
||||
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||
#ifdef EXPRECISION
|
||||
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
|
||||
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef OPTERON
|
||||
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL || defined (SKYLAKEX))
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -62,7 +62,7 @@
|
|||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -63,7 +63,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -61,7 +61,7 @@
|
|||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
include $(KERNELDIR)/KERNEL.HASWELL
|
||||
|
||||
SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S
|
||||
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "caxpy_microk_steamroller-2.c"
|
||||
#elif defined(BULLDOZER)
|
||||
#include "caxpy_microk_bulldozer-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined(SKYLAKEX)
|
||||
#include "caxpy_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "caxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "cdot_microk_bulldozer-2.c"
|
||||
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
|
||||
#include "cdot_microk_steamroller-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "cdot_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "cdot_microk_sandy-2.c"
|
||||
|
|
|
@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <stdio.h>
|
||||
#include "common.h"
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "cgemv_n_microk_haswell-4.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "cgemv_n_microk_bulldozer-4.c"
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "cgemv_t_microk_haswell-4.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "cgemv_t_microk_bulldozer-4.c"
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "cscal_microk_haswell-2.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER)
|
||||
#include "cscal_microk_bulldozer-2.c"
|
||||
|
|
|
@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "daxpy_microk_steamroller-2.c"
|
||||
#elif defined(PILEDRIVER)
|
||||
#include "daxpy_microk_piledriver-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "daxpy_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "daxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "ddot_microk_piledriver-2.c"
|
||||
#elif defined(NEHALEM)
|
||||
#include "ddot_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "ddot_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "ddot_microk_sandy-2.c"
|
||||
|
|
|
@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(NEHALEM)
|
||||
#include "dgemv_n_microk_nehalem-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
|
||||
#include "dgemv_n_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined (SKYLAKEX)
|
||||
#include "dgemv_t_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "dscal_microk_bulldozer-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "dscal_microk_sandy-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "dscal_microk_haswell-2.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "dsymv_L_microk_bulldozer-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "dsymv_L_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "dsymv_L_microk_sandy-2.c"
|
||||
|
|
|
@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "dsymv_U_microk_bulldozer-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "dsymv_U_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "dsymv_U_microk_sandy-2.c"
|
||||
|
|
|
@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(NEHALEM)
|
||||
#include "saxpy_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "saxpy_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "saxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "sdot_microk_steamroller-2.c"
|
||||
#elif defined(NEHALEM)
|
||||
#include "sdot_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "sdot_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "sdot_microk_sandy-2.c"
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "sgemv_n_microk_nehalem-4.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "sgemv_n_microk_sandy-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "sgemv_n_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -34,7 +34,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "sgemv_t_microk_bulldozer-4.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "sgemv_t_microk_sandy-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "sgemv_t_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "ssymv_L_microk_bulldozer-2.c"
|
||||
#elif defined(NEHALEM)
|
||||
#include "ssymv_L_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "ssymv_L_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "ssymv_L_microk_sandy-2.c"
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "ssymv_U_microk_bulldozer-2.c"
|
||||
#elif defined(NEHALEM)
|
||||
#include "ssymv_U_microk_nehalem-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "ssymv_U_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "ssymv_U_microk_sandy-2.c"
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "zaxpy_microk_bulldozer-2.c"
|
||||
#elif defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "zaxpy_microk_steamroller-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "zaxpy_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "zaxpy_microk_sandy-2.c"
|
||||
|
|
|
@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "zdot_microk_bulldozer-2.c"
|
||||
#elif defined(STEAMROLLER) || defined(PILEDRIVER) || defined(EXCAVATOR)
|
||||
#include "zdot_microk_steamroller-2.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "zdot_microk_haswell-2.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "zdot_microk_sandy-2.c"
|
||||
|
|
|
@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "zgemv_n_microk_haswell-4.c"
|
||||
#elif defined(SANDYBRIDGE)
|
||||
#include "zgemv_n_microk_sandy-4.c"
|
||||
|
|
|
@ -31,7 +31,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER) || defined(EXCAVATOR)
|
||||
#include "zgemv_t_microk_bulldozer-4.c"
|
||||
#elif defined(HASWELL) || defined(ZEN)
|
||||
#elif defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "zgemv_t_microk_haswell-4.c"
|
||||
#endif
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include "common.h"
|
||||
|
||||
|
||||
#if defined(HASWELL) || defined(ZEN)
|
||||
#if defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#include "zscal_microk_haswell-2.c"
|
||||
#elif defined(BULLDOZER) || defined(PILEDRIVER)
|
||||
#include "zscal_microk_bulldozer-2.c"
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
|
@ -57,7 +57,7 @@
|
|||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN)
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE) || defined(HASWELL) || defined(ZEN) || defined (SKYLAKEX)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
|
119
param.h
119
param.h
|
@ -1613,6 +1613,125 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef SKYLAKEX
|
||||
|
||||
#define SNUMOPT 16
|
||||
#define DNUMOPT 8
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SYMV_P 8
|
||||
|
||||
#define SWITCH_RATIO 4
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
|
||||
#else
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_MN 32
|
||||
#define DGEMM_DEFAULT_UNROLL_MN 32
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
#define SGEMM_DEFAULT_P 512
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_P 512
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define CGEMM_DEFAULT_P 128
|
||||
#define CGEMM_DEFAULT_R 1024
|
||||
#define ZGEMM_DEFAULT_P 512
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
#define SGEMM_DEFAULT_Q 256
|
||||
#define DGEMM_DEFAULT_Q 256
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 256
|
||||
#define ZGEMM_DEFAULT_Q 192
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#else
|
||||
|
||||
#define SGEMM_DEFAULT_P 768
|
||||
#define DGEMM_DEFAULT_P 512
|
||||
#define CGEMM_DEFAULT_P 384
|
||||
#define ZGEMM_DEFAULT_P 256
|
||||
|
||||
#ifdef WINDOWS_ABI
|
||||
#define SGEMM_DEFAULT_Q 320
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#else
|
||||
#define SGEMM_DEFAULT_Q 384
|
||||
#define DGEMM_DEFAULT_Q 256
|
||||
#endif
|
||||
#define CGEMM_DEFAULT_Q 192
|
||||
#define ZGEMM_DEFAULT_Q 128
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_R 13824
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define QGEMM_DEFAULT_P 504
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define XGEMM_DEFAULT_P 252
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#define CGEMM3M_DEFAULT_UNROLL_N 8
|
||||
#define CGEMM3M_DEFAULT_UNROLL_M 4
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_N 8
|
||||
#define ZGEMM3M_DEFAULT_UNROLL_M 2
|
||||
|
||||
#define CGEMM3M_DEFAULT_P 448
|
||||
#define ZGEMM3M_DEFAULT_P 224
|
||||
#define XGEMM3M_DEFAULT_P 112
|
||||
#define CGEMM3M_DEFAULT_Q 224
|
||||
#define ZGEMM3M_DEFAULT_Q 224
|
||||
#define XGEMM3M_DEFAULT_Q 224
|
||||
#define CGEMM3M_DEFAULT_R 12288
|
||||
#define ZGEMM3M_DEFAULT_R 12288
|
||||
#define XGEMM3M_DEFAULT_R 12288
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue