Merge remote branch 'origin/haswell' into develop
This commit is contained in:
@@ -336,14 +336,14 @@ ifeq ($(ARCH), x86)
|
||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
|
||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
|
||||
ifneq ($(NO_AVX), 1)
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
|
||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER HASWELL
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
5
cpuid.h
5
cpuid.h
@@ -107,7 +107,7 @@
|
||||
#define CORE_BOBCAT 21
|
||||
#define CORE_BULLDOZER 22
|
||||
#define CORE_PILEDRIVER 23
|
||||
#define CORE_HASWELL CORE_SANDYBRIDGE
|
||||
#define CORE_HASWELL 24
|
||||
|
||||
#define HAVE_SSE (1 << 0)
|
||||
#define HAVE_SSE2 (1 << 1)
|
||||
@@ -200,7 +200,6 @@ typedef struct {
|
||||
#define CPUTYPE_BOBCAT 45
|
||||
#define CPUTYPE_BULLDOZER 46
|
||||
#define CPUTYPE_PILEDRIVER 47
|
||||
// this define is because BLAS doesn't have haswell specific optimizations yet
|
||||
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE
|
||||
#define CPUTYPE_HASWELL 48
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1243,6 +1243,7 @@ static char *cpuname[] = {
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
};
|
||||
|
||||
static char *lowercpuname[] = {
|
||||
@@ -1293,6 +1294,7 @@ static char *lowercpuname[] = {
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
};
|
||||
|
||||
static char *corename[] = {
|
||||
@@ -1320,6 +1322,7 @@ static char *corename[] = {
|
||||
"BOBCAT",
|
||||
"BULLDOZER",
|
||||
"PILEDRIVER",
|
||||
"HASWELL",
|
||||
};
|
||||
|
||||
static char *corename_lower[] = {
|
||||
@@ -1347,6 +1350,7 @@ static char *corename_lower[] = {
|
||||
"bobcat",
|
||||
"bulldozer",
|
||||
"piledriver",
|
||||
"haswell",
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -65,14 +65,15 @@ extern gotoblas_t gotoblas_BOBCAT;
|
||||
extern gotoblas_t gotoblas_SANDYBRIDGE;
|
||||
extern gotoblas_t gotoblas_BULLDOZER;
|
||||
extern gotoblas_t gotoblas_PILEDRIVER;
|
||||
extern gotoblas_t gotoblas_HASWELL;
|
||||
#else
|
||||
//Use NEHALEM kernels for sandy bridge
|
||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
|
||||
#define gotoblas_HASWELL gotoblas_NEHALEM
|
||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
|
||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
|
||||
#endif
|
||||
//Use sandy bridge kernels for haswell.
|
||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
|
||||
|
||||
|
||||
#define VENDOR_INTEL 1
|
||||
#define VENDOR_AMD 2
|
||||
@@ -297,6 +298,7 @@ static char *corename[] = {
|
||||
"Bobcat",
|
||||
"Bulldozer",
|
||||
"Piledriver",
|
||||
"Haswell",
|
||||
};
|
||||
|
||||
char *gotoblas_corename(void) {
|
||||
@@ -319,7 +321,8 @@ char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
|
||||
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
|
||||
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
|
||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
|
||||
if (gotoblas == &gotoblas_PILEDRIVER) return corename[19];
|
||||
if (gotoblas == &gotoblas_HASWELL) return corename[20];
|
||||
|
||||
return corename[0];
|
||||
}
|
||||
|
||||
15
getarch.c
15
getarch.c
@@ -298,6 +298,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define CORENAME "SANDYBRIDGE"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_HASWELL
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
#define ARCHITECTURE "X86"
|
||||
#define SUBARCHITECTURE "HASWELL"
|
||||
#define ARCHCONFIG "-DHASWELL " \
|
||||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
|
||||
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX " \
|
||||
"-DFMA3"
|
||||
#define LIBNAME "haswell"
|
||||
#define CORENAME "HASWELL"
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ATOM
|
||||
#define FORCE
|
||||
#define FORCE_INTEL
|
||||
|
||||
1
kernel/x86/KERNEL.HASWELL
Normal file
1
kernel/x86/KERNEL.HASWELL
Normal file
@@ -0,0 +1 @@
|
||||
include $(KERNELDIR)/KERNEL.PENRYN
|
||||
@@ -62,7 +62,7 @@
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
@@ -62,7 +62,7 @@
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHSIZE (8 * 21 + 4)
|
||||
#endif
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
@@ -63,7 +63,7 @@
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
@@ -61,7 +61,7 @@
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht1
|
||||
#define PREFETCHSIZE 84
|
||||
#endif
|
||||
|
||||
63
kernel/x86_64/KERNEL.HASWELL
Normal file
63
kernel/x86_64/KERNEL.HASWELL
Normal file
@@ -0,0 +1,63 @@
|
||||
SGEMMKERNEL = sgemm_kernel_16x4_haswell.S
|
||||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c
|
||||
SGEMMITCOPY = ../generic/gemm_tcopy_16.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x4_haswell.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
CGEMMKERNEL = cgemm_kernel_8x2_haswell.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_4x2_haswell.S
|
||||
ZGEMMINCOPY = ../generic/zgemm_ncopy_4.c
|
||||
ZGEMMITCOPY = ../generic/zgemm_tcopy_4.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CGEMM3MKERNEL = zgemm3m_kernel_4x8_nehalem.S
|
||||
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S
|
||||
|
||||
2284
kernel/x86_64/cgemm_kernel_8x2_haswell.S
Normal file
2284
kernel/x86_64/cgemm_kernel_8x2_haswell.S
Normal file
File diff suppressed because it is too large
Load Diff
5215
kernel/x86_64/dgemm_kernel_16x2_haswell.S
Normal file
5215
kernel/x86_64/dgemm_kernel_16x2_haswell.S
Normal file
File diff suppressed because it is too large
Load Diff
3479
kernel/x86_64/dgemm_kernel_4x4_haswell.S
Normal file
3479
kernel/x86_64/dgemm_kernel_4x4_haswell.S
Normal file
File diff suppressed because it is too large
Load Diff
3159
kernel/x86_64/sgemm_kernel_16x4_haswell.S
Normal file
3159
kernel/x86_64/sgemm_kernel_16x4_haswell.S
Normal file
File diff suppressed because it is too large
Load Diff
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 12)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
||||
1812
kernel/x86_64/zgemm_kernel_4x2_haswell.S
Normal file
1812
kernel/x86_64/zgemm_kernel_4x2_haswell.S
Normal file
File diff suppressed because it is too large
Load Diff
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
||||
@@ -57,7 +57,7 @@
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
#endif
|
||||
|
||||
#if defined(NEHALEM) || defined(SANDYBRIDGE)
|
||||
#if defined(NEHALEM) || defined(NEHALEM_OPTIMIZATION)
|
||||
#define PREFETCH prefetcht0
|
||||
#define PREFETCHW prefetcht0
|
||||
#define PREFETCHSIZE (16 * 24)
|
||||
|
||||
17
param.h
17
param.h
@@ -1154,6 +1154,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#ifdef HASWELL
|
||||
|
||||
<<<<<<< HEAD
|
||||
#define SNUMOPT 8
|
||||
#define DNUMOPT 4
|
||||
|
||||
@@ -1164,6 +1165,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define SYMV_P 8
|
||||
|
||||
#define SWITCH_RATIO 4
|
||||
=======
|
||||
#define SNUMOPT 8
|
||||
#define DNUMOPT 4
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SYMV_P 8
|
||||
|
||||
#define SWITCH_RATIO 4
|
||||
>>>>>>> origin/haswell
|
||||
|
||||
#ifdef ARCH_X86
|
||||
|
||||
@@ -1233,6 +1246,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define ZGEMM_DEFAULT_Q 128
|
||||
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
//#define DGEMM_DEFAULT_R dgemm_r
|
||||
>>>>>>> origin/haswell
|
||||
#define DGEMM_DEFAULT_R 13824
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
|
||||
Reference in New Issue
Block a user