added target processor STEAMROLLER
This commit is contained in:
		
							parent
							
								
									cbb3ab80e7
								
							
						
					
					
						commit
						4319769b79
					
				| 
						 | 
				
			
			@ -15,7 +15,7 @@ VERSION = 0.2.13
 | 
			
		|||
# TARGET = PENRYN
 | 
			
		||||
 | 
			
		||||
# If you want to support multiple architecture in one binary
 | 
			
		||||
# DYNAMIC_ARCH = 1
 | 
			
		||||
DYNAMIC_ARCH = 1
 | 
			
		||||
 | 
			
		||||
# C compiler including binary type(32bit / 64bit). Default is gcc.
 | 
			
		||||
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -61,6 +61,9 @@ endif
 | 
			
		|||
ifeq ($(TARGET), PILEDRIVER)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(TARGET), STEAMROLLER)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -85,6 +88,9 @@ endif
 | 
			
		|||
ifeq ($(TARGET_CORE), PILEDRIVER)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
ifeq ($(TARGET_CORE), STEAMROLLER)
 | 
			
		||||
GETARCH_FLAGS := -DFORCE_BARCELONA
 | 
			
		||||
endif
 | 
			
		||||
endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -392,7 +398,7 @@ endif
 | 
			
		|||
ifeq ($(ARCH), x86_64)
 | 
			
		||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
 | 
			
		||||
ifneq ($(NO_AVX), 1)
 | 
			
		||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER
 | 
			
		||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER
 | 
			
		||||
endif
 | 
			
		||||
ifneq ($(NO_AVX2), 1)
 | 
			
		||||
DYNAMIC_CORE += HASWELL
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -60,6 +60,7 @@ Please read GotoBLAS_01Readme.txt
 | 
			
		|||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
 | 
			
		||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thank Werner Saar)
 | 
			
		||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
 | 
			
		||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
 | 
			
		||||
 | 
			
		||||
#### MIPS64:
 | 
			
		||||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -32,6 +32,7 @@ ISTANBUL
 | 
			
		|||
BOBCAT
 | 
			
		||||
BULLDOZER
 | 
			
		||||
PILEDRIVER
 | 
			
		||||
STEAMROLLER
 | 
			
		||||
 | 
			
		||||
c)VIA CPU:
 | 
			
		||||
SSE_GENERIC
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -171,7 +171,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
			
		|||
#define MMXSTORE	movd
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 | 
			
		||||
//Enable some optimazation for barcelona.
 | 
			
		||||
#define BARCELONA_OPTIMIZATION
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -226,7 +226,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
 | 
			
		|||
 | 
			
		||||
#ifdef ASSEMBLER
 | 
			
		||||
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
 | 
			
		||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER)
 | 
			
		||||
//Enable some optimazation for barcelona.
 | 
			
		||||
#define BARCELONA_OPTIMIZATION
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										10
									
								
								cpuid.h
								
								
								
								
							
							
						
						
									
										10
									
								
								cpuid.h
								
								
								
								
							| 
						 | 
				
			
			@ -104,10 +104,11 @@
 | 
			
		|||
#define CORE_ATOM	18
 | 
			
		||||
#define CORE_NANO	19
 | 
			
		||||
#define CORE_SANDYBRIDGE 20
 | 
			
		||||
#define CORE_BOBCAT     21
 | 
			
		||||
#define CORE_BULLDOZER  22
 | 
			
		||||
#define CORE_BOBCAT      21
 | 
			
		||||
#define CORE_BULLDOZER   22
 | 
			
		||||
#define CORE_PILEDRIVER  23
 | 
			
		||||
#define CORE_HASWELL 24
 | 
			
		||||
#define CORE_HASWELL     24
 | 
			
		||||
#define CORE_STEAMROLLER 25
 | 
			
		||||
 | 
			
		||||
#define HAVE_SSE      (1 <<  0)
 | 
			
		||||
#define HAVE_SSE2     (1 <<  1)
 | 
			
		||||
| 
						 | 
				
			
			@ -200,6 +201,7 @@ typedef struct {
 | 
			
		|||
#define CPUTYPE_BOBCAT                  45
 | 
			
		||||
#define CPUTYPE_BULLDOZER               46
 | 
			
		||||
#define CPUTYPE_PILEDRIVER              47
 | 
			
		||||
#define CPUTYPE_HASWELL 48
 | 
			
		||||
#define CPUTYPE_HASWELL 		48
 | 
			
		||||
#define CPUTYPE_STEAMROLLER 		49
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										18
									
								
								cpuid_x86.c
								
								
								
								
							
							
						
						
									
										18
									
								
								cpuid_x86.c
								
								
								
								
							| 
						 | 
				
			
			@ -1162,6 +1162,12 @@ int get_cpuname(void){
 | 
			
		|||
	    return CPUTYPE_PILEDRIVER;
 | 
			
		||||
	  else
 | 
			
		||||
	    return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
	case 0:
 | 
			
		||||
	  if(support_avx())
 | 
			
		||||
	    return CPUTYPE_STEAMROLLER;
 | 
			
		||||
	  else
 | 
			
		||||
	    return CPUTYPE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
 | 
			
		||||
	}
 | 
			
		||||
	break;
 | 
			
		||||
      case  5:
 | 
			
		||||
| 
						 | 
				
			
			@ -1290,6 +1296,7 @@ static char *cpuname[] = {
 | 
			
		|||
  "BULLDOZER",
 | 
			
		||||
  "PILEDRIVER",
 | 
			
		||||
  "HASWELL",
 | 
			
		||||
  "STEAMROLLER",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static char *lowercpuname[] = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1341,6 +1348,7 @@ static char *lowercpuname[] = {
 | 
			
		|||
  "bulldozer",
 | 
			
		||||
  "piledriver",
 | 
			
		||||
  "haswell",
 | 
			
		||||
  "steamroller",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static char *corename[] = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1369,6 +1377,7 @@ static char *corename[] = {
 | 
			
		|||
  "BULLDOZER",
 | 
			
		||||
  "PILEDRIVER",
 | 
			
		||||
  "HASWELL",
 | 
			
		||||
  "STEAMROLLER",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static char *corename_lower[] = {
 | 
			
		||||
| 
						 | 
				
			
			@ -1397,6 +1406,7 @@ static char *corename_lower[] = {
 | 
			
		|||
  "bulldozer",
 | 
			
		||||
  "piledriver",
 | 
			
		||||
  "haswell",
 | 
			
		||||
  "steamroller",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1562,7 +1572,15 @@ int get_coretype(void){
 | 
			
		|||
	    return CORE_PILEDRIVER;
 | 
			
		||||
	  else
 | 
			
		||||
	    return CORE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
	
 | 
			
		||||
	case 0:
 | 
			
		||||
	  if(support_avx())
 | 
			
		||||
	    return CORE_STEAMROLLER;
 | 
			
		||||
	  else
 | 
			
		||||
	    return CORE_BARCELONA; //OS don't support AVX.
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      }else return CORE_BARCELONA;
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -66,6 +66,7 @@ extern gotoblas_t  gotoblas_BOBCAT;
 | 
			
		|||
extern gotoblas_t  gotoblas_SANDYBRIDGE;
 | 
			
		||||
extern gotoblas_t  gotoblas_BULLDOZER;
 | 
			
		||||
extern gotoblas_t  gotoblas_PILEDRIVER;
 | 
			
		||||
extern gotoblas_t  gotoblas_STEAMROLLER;
 | 
			
		||||
#ifdef NO_AVX2
 | 
			
		||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE
 | 
			
		||||
#else
 | 
			
		||||
| 
						 | 
				
			
			@ -77,6 +78,7 @@ extern gotoblas_t  gotoblas_HASWELL;
 | 
			
		|||
#define gotoblas_HASWELL gotoblas_NEHALEM
 | 
			
		||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA
 | 
			
		||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA
 | 
			
		||||
#define gotoblas_STEAMROLLER gotoblas_BARCELONA
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -275,7 +277,17 @@ static gotoblas_t *get_coretype(void){
 | 
			
		|||
	    openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
 | 
			
		||||
	    return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
 | 
			
		||||
	  }
 | 
			
		||||
	}else if(model == 0){
 | 
			
		||||
	  //AMD STEAMROLLER
 | 
			
		||||
	  if(support_avx())
 | 
			
		||||
	    return &gotoblas_STEAMROLLER;
 | 
			
		||||
	  else{
 | 
			
		||||
	    openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
 | 
			
		||||
	    return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
 | 
			
		||||
	  }
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
      } else {
 | 
			
		||||
	return &gotoblas_BARCELONA;
 | 
			
		||||
      }
 | 
			
		||||
| 
						 | 
				
			
			@ -315,6 +327,7 @@ static char *corename[] = {
 | 
			
		|||
    "Bulldozer",
 | 
			
		||||
    "Piledriver",
 | 
			
		||||
    "Haswell",
 | 
			
		||||
    "Steamroller",
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
char *gotoblas_corename(void) {
 | 
			
		||||
| 
						 | 
				
			
			@ -339,6 +352,7 @@ char *gotoblas_corename(void) {
 | 
			
		|||
  if (gotoblas == &gotoblas_BULLDOZER)    return corename[18];
 | 
			
		||||
  if (gotoblas == &gotoblas_PILEDRIVER)   return corename[19];
 | 
			
		||||
  if (gotoblas == &gotoblas_HASWELL)      return corename[20];
 | 
			
		||||
  if (gotoblas == &gotoblas_STEAMROLLER)  return corename[21];
 | 
			
		||||
 | 
			
		||||
  return corename[0];
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -370,6 +384,7 @@ static gotoblas_t *force_coretype(char *coretype){
 | 
			
		|||
	switch (found)
 | 
			
		||||
	{
 | 
			
		||||
 | 
			
		||||
		case 21: return (&gotoblas_STEAMROLLER);
 | 
			
		||||
		case 20: return (&gotoblas_HASWELL);
 | 
			
		||||
		case 19: return (&gotoblas_PILEDRIVER);
 | 
			
		||||
		case 18: return (&gotoblas_BULLDOZER);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -166,7 +166,7 @@ int get_L2_size(void){
 | 
			
		|||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
 | 
			
		||||
    defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
 | 
			
		||||
    defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
 | 
			
		||||
    defined(PILEDRIVER) || defined(HASWELL)
 | 
			
		||||
    defined(PILEDRIVER) || defined(HASWELL) || defined(STEAMROLLER)
 | 
			
		||||
 | 
			
		||||
  cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -251,7 +251,7 @@ void blas_set_parameter(void){
 | 
			
		|||
 | 
			
		||||
  env_var_t p;
 | 
			
		||||
  int factor;
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL)
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(SANDYBRIDGE) || defined(NEHALEM) || defined(HASWELL) || defined(STEAMROLLER)
 | 
			
		||||
  int size = 16;
 | 
			
		||||
#else
 | 
			
		||||
  int size = get_L2_size();
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										17
									
								
								getarch.c
								
								
								
								
							
							
						
						
									
										17
									
								
								getarch.c
								
								
								
								
							| 
						 | 
				
			
			@ -432,6 +432,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
#define CORENAME  "PILEDRIVER"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined (FORCE_STEAMROLLER)
 | 
			
		||||
#define FORCE
 | 
			
		||||
#define FORCE_INTEL
 | 
			
		||||
#define ARCHITECTURE    "X86"
 | 
			
		||||
#define SUBARCHITECTURE "STEAMROLLER"
 | 
			
		||||
#define ARCHCONFIG   "-DSTEAMROLLER " \
 | 
			
		||||
		     "-DL1_DATA_SIZE=16384 -DL1_DATA_LINESIZE=64 " \
 | 
			
		||||
		     "-DL2_SIZE=2097152 -DL2_LINESIZE=64  -DL3_SIZE=12582912 " \
 | 
			
		||||
		     "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
 | 
			
		||||
		     "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 " \
 | 
			
		||||
		     "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU -DHAVE_CFLUSH " \
 | 
			
		||||
                     "-DHAVE_AVX -DHAVE_FMA4 -DHAVE_FMA3"
 | 
			
		||||
#define LIBNAME   "steamroller"
 | 
			
		||||
#define CORENAME  "STEAMROLLER"
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef FORCE_SSE_GENERIC
 | 
			
		||||
#define FORCE
 | 
			
		||||
#define FORCE_INTEL
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -941,6 +941,23 @@ static void init_parameter(void) {
 | 
			
		|||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef STEAMROLLER
 | 
			
		||||
 | 
			
		||||
#ifdef DEBUG
 | 
			
		||||
  fprintf(stderr, "Steamroller\n");
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
  TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
 | 
			
		||||
  TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
 | 
			
		||||
  TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
 | 
			
		||||
  TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
 | 
			
		||||
#ifdef EXPRECISION
 | 
			
		||||
  TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
 | 
			
		||||
  TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef NANO
 | 
			
		||||
 | 
			
		||||
#ifdef DEBUG
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,76 @@
 | 
			
		|||
SGEMVNKERNEL = sgemv_n_4.c
 | 
			
		||||
SGEMVTKERNEL = sgemv_t_4.c
 | 
			
		||||
 | 
			
		||||
ZGEMVNKERNEL = zgemv_n_dup.S
 | 
			
		||||
ZGEMVTKERNEL = zgemv_t_4.c
 | 
			
		||||
 | 
			
		||||
DGEMVNKERNEL = dgemv_n_bulldozer.S
 | 
			
		||||
DGEMVTKERNEL = dgemv_t_bulldozer.S
 | 
			
		||||
 | 
			
		||||
DDOTKERNEL   = ddot_bulldozer.S
 | 
			
		||||
DCOPYKERNEL  = dcopy_bulldozer.S
 | 
			
		||||
 | 
			
		||||
SGEMMKERNEL    =  sgemm_kernel_16x2_piledriver.S
 | 
			
		||||
SGEMMINCOPY    =  ../generic/gemm_ncopy_16.c
 | 
			
		||||
SGEMMITCOPY    =  ../generic/gemm_tcopy_16.c
 | 
			
		||||
SGEMMONCOPY    =  gemm_ncopy_2_bulldozer.S
 | 
			
		||||
SGEMMOTCOPY    =  gemm_tcopy_2_bulldozer.S
 | 
			
		||||
SGEMMINCOPYOBJ =  sgemm_incopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
SGEMMITCOPYOBJ =  sgemm_itcopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
SGEMMONCOPYOBJ =  sgemm_oncopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
SGEMMOTCOPYOBJ =  sgemm_otcopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
 | 
			
		||||
DGEMMKERNEL    =  dgemm_kernel_8x2_piledriver.S
 | 
			
		||||
DGEMMINCOPY    =  dgemm_ncopy_8_bulldozer.S
 | 
			
		||||
DGEMMITCOPY    =  dgemm_tcopy_8_bulldozer.S
 | 
			
		||||
DGEMMONCOPY    =  gemm_ncopy_2_bulldozer.S
 | 
			
		||||
DGEMMOTCOPY    =  gemm_tcopy_2_bulldozer.S
 | 
			
		||||
DGEMMINCOPYOBJ =  dgemm_incopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
DGEMMITCOPYOBJ =  dgemm_itcopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
DGEMMONCOPYOBJ =  dgemm_oncopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
DGEMMOTCOPYOBJ =  dgemm_otcopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
 | 
			
		||||
CGEMMKERNEL    =  cgemm_kernel_4x2_piledriver.S
 | 
			
		||||
CGEMMINCOPY    =  ../generic/zgemm_ncopy_4.c
 | 
			
		||||
CGEMMITCOPY    =  ../generic/zgemm_tcopy_4.c
 | 
			
		||||
CGEMMONCOPY    =  ../generic/zgemm_ncopy_2.c
 | 
			
		||||
CGEMMOTCOPY    =  ../generic/zgemm_tcopy_2.c
 | 
			
		||||
CGEMMINCOPYOBJ =  cgemm_incopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
CGEMMITCOPYOBJ =  cgemm_itcopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
CGEMMONCOPYOBJ =  cgemm_oncopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
CGEMMOTCOPYOBJ =  cgemm_otcopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
ZGEMMKERNEL    =  zgemm_kernel_2x2_piledriver.S
 | 
			
		||||
ZGEMMINCOPY    =
 | 
			
		||||
ZGEMMITCOPY    =
 | 
			
		||||
ZGEMMONCOPY    =  ../generic/zgemm_ncopy_2.c
 | 
			
		||||
ZGEMMOTCOPY    =  ../generic/zgemm_tcopy_2.c
 | 
			
		||||
ZGEMMINCOPYOBJ =
 | 
			
		||||
ZGEMMITCOPYOBJ =
 | 
			
		||||
ZGEMMONCOPYOBJ =  zgemm_oncopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
ZGEMMOTCOPYOBJ =  zgemm_otcopy$(TSUFFIX).$(SUFFIX)
 | 
			
		||||
 | 
			
		||||
CGEMM3MKERNEL    =  zgemm3m_kernel_8x4_barcelona.S
 | 
			
		||||
ZGEMM3MKERNEL    =  zgemm3m_kernel_4x4_barcelona.S
 | 
			
		||||
 | 
			
		||||
STRSMKERNEL_LN  =  ../generic/trsm_kernel_LN.c
 | 
			
		||||
STRSMKERNEL_LT  =  ../generic/trsm_kernel_LT.c
 | 
			
		||||
STRSMKERNEL_RN  =  ../generic/trsm_kernel_RN.c
 | 
			
		||||
STRSMKERNEL_RT  =  ../generic/trsm_kernel_RT.c
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
DTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
 | 
			
		||||
DTRSMKERNEL_LT  = dtrsm_kernel_LT_8x2_bulldozer.S
 | 
			
		||||
DTRSMKERNEL_RN  = dtrsm_kernel_RN_8x2_bulldozer.S
 | 
			
		||||
DTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
 | 
			
		||||
 | 
			
		||||
CTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
 | 
			
		||||
CTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
 | 
			
		||||
CTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
 | 
			
		||||
CTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
 | 
			
		||||
 | 
			
		||||
ZTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
 | 
			
		||||
ZTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
 | 
			
		||||
ZTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
 | 
			
		||||
ZTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER)
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 | 
			
		||||
#include "ddot_microk_bulldozer-2.c"
 | 
			
		||||
#elif defined(NEHALEM)
 | 
			
		||||
#include "ddot_microk_nehalem-2.c"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
 | 
			
		||||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER)
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 | 
			
		||||
#include "sdot_microk_bulldozer-2.c"
 | 
			
		||||
#elif defined(NEHALEM)
 | 
			
		||||
#include "sdot_microk_nehalem-2.c"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER)
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 | 
			
		||||
#include "sgemv_n_microk_bulldozer-4.c"
 | 
			
		||||
#elif defined(NEHALEM)
 | 
			
		||||
#include "sgemv_n_microk_nehalem-4.c"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
 | 
			
		||||
#if defined(NEHALEM)
 | 
			
		||||
#include "sgemv_t_microk_nehalem-4.c"
 | 
			
		||||
#elif defined(BULLDOZER) || defined(PILEDRIVER)
 | 
			
		||||
#elif defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 | 
			
		||||
#include "sgemv_t_microk_bulldozer-4.c"
 | 
			
		||||
#elif defined(SANDYBRIDGE)
 | 
			
		||||
#include "sgemv_t_microk_sandy-4.c"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,7 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
#include "common.h"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER)
 | 
			
		||||
#if defined(BULLDOZER) || defined(PILEDRIVER) || defined(STEAMROLLER)
 | 
			
		||||
#include "zgemv_t_microk_bulldozer-4.c"
 | 
			
		||||
#elif defined(HASWELL)
 | 
			
		||||
#include "zgemv_t_microk_haswell-4.c"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										93
									
								
								param.h
								
								
								
								
							
							
						
						
									
										93
									
								
								param.h
								
								
								
								
							| 
						 | 
				
			
			@ -406,6 +406,99 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
			
		|||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef STEAMROLLER
 | 
			
		||||
#define SNUMOPT         8
 | 
			
		||||
#define DNUMOPT         4
 | 
			
		||||
 | 
			
		||||
#define GEMM_DEFAULT_OFFSET_A  64
 | 
			
		||||
#define GEMM_DEFAULT_OFFSET_B 832
 | 
			
		||||
#define GEMM_DEFAULT_ALIGN 0x0fffUL
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#define QGEMM_DEFAULT_UNROLL_N 2
 | 
			
		||||
#define CGEMM_DEFAULT_UNROLL_N 2
 | 
			
		||||
#define ZGEMM_DEFAULT_UNROLL_N 2
 | 
			
		||||
#define XGEMM_DEFAULT_UNROLL_N 1
 | 
			
		||||
 | 
			
		||||
#ifdef ARCH_X86
 | 
			
		||||
#define SGEMM_DEFAULT_UNROLL_N 4
 | 
			
		||||
#define DGEMM_DEFAULT_UNROLL_N 4
 | 
			
		||||
#define SGEMM_DEFAULT_UNROLL_M 4
 | 
			
		||||
#define DGEMM_DEFAULT_UNROLL_M 2
 | 
			
		||||
#define QGEMM_DEFAULT_UNROLL_M 2
 | 
			
		||||
#define CGEMM_DEFAULT_UNROLL_M 2
 | 
			
		||||
#define ZGEMM_DEFAULT_UNROLL_M 1
 | 
			
		||||
#define XGEMM_DEFAULT_UNROLL_M 1
 | 
			
		||||
#else
 | 
			
		||||
#define SGEMM_DEFAULT_UNROLL_N 2
 | 
			
		||||
#define DGEMM_DEFAULT_UNROLL_N 2
 | 
			
		||||
#define SGEMM_DEFAULT_UNROLL_M 16
 | 
			
		||||
#define DGEMM_DEFAULT_UNROLL_M 8
 | 
			
		||||
#define QGEMM_DEFAULT_UNROLL_M 2
 | 
			
		||||
#define CGEMM_DEFAULT_UNROLL_M 4
 | 
			
		||||
#define ZGEMM_DEFAULT_UNROLL_M 2
 | 
			
		||||
#define XGEMM_DEFAULT_UNROLL_M 1
 | 
			
		||||
#define CGEMM3M_DEFAULT_UNROLL_N 4
 | 
			
		||||
#define CGEMM3M_DEFAULT_UNROLL_M 8
 | 
			
		||||
#define ZGEMM3M_DEFAULT_UNROLL_N 4
 | 
			
		||||
#define ZGEMM3M_DEFAULT_UNROLL_M 4
 | 
			
		||||
#define GEMV_UNROLL 8
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#if defined(ARCH_X86_64)
 | 
			
		||||
#define SGEMM_DEFAULT_P 768
 | 
			
		||||
#define DGEMM_DEFAULT_P 768
 | 
			
		||||
#define ZGEMM_DEFAULT_P 384
 | 
			
		||||
#define CGEMM_DEFAULT_P 768
 | 
			
		||||
#else
 | 
			
		||||
#define SGEMM_DEFAULT_P 448
 | 
			
		||||
#define DGEMM_DEFAULT_P 480
 | 
			
		||||
#define ZGEMM_DEFAULT_P 112
 | 
			
		||||
#define CGEMM_DEFAULT_P 224
 | 
			
		||||
#endif
 | 
			
		||||
#define QGEMM_DEFAULT_P 112
 | 
			
		||||
#define XGEMM_DEFAULT_P  56
 | 
			
		||||
 | 
			
		||||
#if defined(ARCH_X86_64)
 | 
			
		||||
#define SGEMM_DEFAULT_Q 192
 | 
			
		||||
#define DGEMM_DEFAULT_Q 168
 | 
			
		||||
#define ZGEMM_DEFAULT_Q 168
 | 
			
		||||
#define CGEMM_DEFAULT_Q 168
 | 
			
		||||
#else
 | 
			
		||||
#define SGEMM_DEFAULT_Q 224
 | 
			
		||||
#define DGEMM_DEFAULT_Q 224
 | 
			
		||||
#define ZGEMM_DEFAULT_Q 224
 | 
			
		||||
#define CGEMM_DEFAULT_Q 224
 | 
			
		||||
#endif
 | 
			
		||||
#define QGEMM_DEFAULT_Q 224
 | 
			
		||||
#define XGEMM_DEFAULT_Q 224
 | 
			
		||||
 | 
			
		||||
#define CGEMM3M_DEFAULT_P 448
 | 
			
		||||
#define ZGEMM3M_DEFAULT_P 224
 | 
			
		||||
#define XGEMM3M_DEFAULT_P 112
 | 
			
		||||
#define CGEMM3M_DEFAULT_Q 224
 | 
			
		||||
#define ZGEMM3M_DEFAULT_Q 224
 | 
			
		||||
#define XGEMM3M_DEFAULT_Q 224
 | 
			
		||||
#define CGEMM3M_DEFAULT_R 12288
 | 
			
		||||
#define ZGEMM3M_DEFAULT_R 12288
 | 
			
		||||
#define XGEMM3M_DEFAULT_R 12288
 | 
			
		||||
 | 
			
		||||
#define SGEMM_DEFAULT_R 12288
 | 
			
		||||
#define QGEMM_DEFAULT_R qgemm_r
 | 
			
		||||
#define DGEMM_DEFAULT_R 12288
 | 
			
		||||
#define CGEMM_DEFAULT_R cgemm_r
 | 
			
		||||
#define ZGEMM_DEFAULT_R zgemm_r
 | 
			
		||||
#define XGEMM_DEFAULT_R xgemm_r
 | 
			
		||||
 | 
			
		||||
#define SYMV_P  16
 | 
			
		||||
#define HAVE_EXCLUSIVE_CACHE
 | 
			
		||||
 | 
			
		||||
#define GEMM_THREAD gemm_thread_mn
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#ifdef ATHLON
 | 
			
		||||
 | 
			
		||||
#define SNUMOPT		4
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue