Initial support for SkylakeX / AVX512
This patch adds the basic infrastructure for adding the SkylakeX (Intel Skylake server) target. The SkylakeX target will use the AVX512 (AVX512VL level) instruction set, which brings 2 basic things: 1) 512 bit wide SIMD (2x width of AVX2) 2) 32 SIMD registers (2x the number on AVX2) This initial patch only contains a trivial transofrmation of the Haswell SGEMM kernel to AVX512VL; more will follow later but this patch aims to get the infrastructure in place for this "later". Full performance tuning has not been done yet; with more registers and wider SIMD it's in theory possible to retune the kernels but even without that there's an interesting enough performance increase (30-40% range) with just this change.
This commit is contained in:
@@ -74,6 +74,7 @@ extern gotoblas_t gotoblas_STEAMROLLER;
|
||||
extern gotoblas_t gotoblas_EXCAVATOR;
|
||||
#ifdef NO_AVX2
|
||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
|
||||
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
|
||||
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
|
||||
#else
|
||||
extern gotoblas_t gotoblas_HASWELL;
|
||||
@@ -83,6 +84,7 @@ extern gotoblas_t gotoblas_ZEN;
|
||||
//Use NEHALEM kernels for sandy bridge
|
||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||
#define gotoblas_HASWELL gotoblas_NEHALEM
|
||||
#define gotoblas_SKYLAKEX gotoblas_NEHALEM
|
||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
||||
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
|
||||
|
||||
@@ -167,7 +167,7 @@ int get_L2_size(void){
|
||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
|
||||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
|
||||
defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
|
||||
|
||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
@@ -251,7 +251,7 @@ int get_L2_size(void){
|
||||
void blas_set_parameter(void){
|
||||
|
||||
int factor;
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN)
|
||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER) || defined(EXCAVATOR) || defined(ZEN) || defined(SKYLAKEX)
|
||||
int size = 16;
|
||||
#else
|
||||
int size = get_L2_size();
|
||||
|
||||
Reference in New Issue
Block a user