Merge branch 'bulldozer' into develop

This commit is contained in:
Zhang Xianyi 2013-02-10 01:19:42 +08:00
commit 5c8bf6ae0e
55 changed files with 2188 additions and 138 deletions

View File

@ -277,14 +277,14 @@ ifeq ($(ARCH), x86)
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER
endif
endif
ifeq ($(ARCH), x86_64)
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO
ifneq ($(NO_AVX), 1)
DYNAMIC_CORE += SANDYBRIDGE
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER
endif
endif

View File

@ -29,6 +29,7 @@ BARCELONA
SHANGHAI
ISTANBUL
BOBCAT
BULLDOZER
c)VIA CPU:
SSE_GENERIC

View File

@ -125,7 +125,8 @@
#define HAVE_MISALIGNSSE (1 << 15)
#define HAVE_128BITFPU (1 << 16)
#define HAVE_FASTMOVU (1 << 17)
#define HAVE_AVX (1 << 18)
#define HAVE_AVX (1 << 18)
#define HAVE_FMA4 (1 << 19)
#define CACHE_INFO_L1_I 1
#define CACHE_INFO_L1_D 2

View File

@ -43,6 +43,8 @@
#ifdef NO_AVX
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM
#define CORE_SANDYBRIDGE CORE_NEHALEM
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA
#define CORE_BULLDOZER CORE_BARCELONA
#endif
#ifndef CPUIDEMU
@ -229,6 +231,9 @@ int get_cputype(int gettype){
cpuid(0x80000001, &eax, &ebx, &ecx, &edx);
if ((ecx & (1 << 6)) != 0) feature |= HAVE_SSE4A;
if ((ecx & (1 << 7)) != 0) feature |= HAVE_MISALIGNSSE;
#ifndef NO_AVX
if ((ecx & (1 << 16)) != 0) feature |= HAVE_FMA4;
#endif
if ((edx & (1 << 30)) != 0) feature |= HAVE_3DNOWEX;
if ((edx & (1 << 31)) != 0) feature |= HAVE_3DNOW;
}
@ -1078,8 +1083,12 @@ int get_cpuname(void){
return CPUTYPE_OPTERON;
case 1:
case 10:
case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
return CPUTYPE_BARCELONA;
case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
if(support_avx())
return CPUTYPE_BULLDOZER;
else
return CPUTYPE_BARCELONA; //OS don't support AVX.
case 5:
return CPUTYPE_BOBCAT;
}
@ -1432,8 +1441,13 @@ int get_coretype(void){
if (family == 0xf){
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
else if (exfamily == 5) return CORE_BOBCAT;
else if (exfamily == 6) return CORE_BARCELONA; //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
else return CORE_BARCELONA;
else if (exfamily == 6) {
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
if(support_avx())
return CORE_BULLDOZER;
else
return CORE_BARCELONA; //OS don't support AVX. Use old kernels.
}else return CORE_BARCELONA;
}
}
@ -1519,6 +1533,7 @@ void get_cpuconfig(void){
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
if (features & HAVE_CFLUSH) printf("#define HAVE_CFLUSH\n");
if (features & HAVE_HIT) printf("#define HAVE_HIT 1\n");
if (features & HAVE_MISALIGNSSE) printf("#define HAVE_MISALIGNSSE\n");
@ -1585,5 +1600,6 @@ void get_sse(void){
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");
}

View File

@ -63,9 +63,11 @@ extern gotoblas_t gotoblas_BARCELONA;
extern gotoblas_t gotoblas_BOBCAT;
#ifndef NO_AVX
extern gotoblas_t gotoblas_SANDYBRIDGE;
extern gotoblas_t gotoblas_BULLDOZER;
#else
//Use NEHALEM kernels for sandy bridge
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM
#define gotoblas_BULLDOZER gotoblas_BARCELONA
#endif
@ -204,6 +206,14 @@ static gotoblas_t *get_coretype(void){
else return &gotoblas_OPTERON;
} else if (exfamily == 5) {
return &gotoblas_BOBCAT;
} else if (exfamily == 6) {
//AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
if(support_avx())
return &gotoblas_BULLDOZER;
else{
fprintf(stderr, "OpenBLAS : Your OS doesn't support AVX. Use Barcelona kernels.\n");
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
}
} else {
return &gotoblas_BARCELONA;
}
@ -240,6 +250,7 @@ static char *corename[] = {
"Nano",
"Sandybridge",
"Bobcat",
"Bulldozer",
};
char *gotoblas_corename(void) {
@ -261,6 +272,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_NANO) return corename[15];
if (gotoblas == &gotoblas_SANDYBRIDGE) return corename[16];
if (gotoblas == &gotoblas_BOBCAT) return corename[17];
if (gotoblas == &gotoblas_BULLDOZER) return corename[18];
return corename[0];
}

View File

@ -163,7 +163,7 @@ int get_L2_size(void){
int eax, ebx, ecx, edx;
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || \
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC)

View File

@ -350,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "OPTERON"
#endif
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER)
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
@ -380,6 +380,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "BOBCAT"
#endif
#if defined (FORCE_BULLDOZER)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
#define SUBARCHITECTURE "BULLDOZER"
#define ARCHCONFIG "-DBULLDOZER " \
"-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \
"-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \
"-DHAVE_AVX -DHAVE_FMA4"
#define LIBNAME "bulldozer"
#define CORENAME "BULLDOZER"
#endif
#ifdef FORCE_SSE_GENERIC
#define FORCE
#define FORCE_INTEL

View File

@ -810,6 +810,22 @@ static void init_parameter(void) {
#endif
#endif
#ifdef BULLDOZER
#ifdef DEBUG
fprintf(stderr, "Bulldozer\n");
#endif
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif
#endif
#ifdef NANO
#ifdef DEBUG

View File

@ -0,0 +1,59 @@
SGEMMKERNEL = gemm_kernel_4x4_barcelona.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_2x4_barcelona.S
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_2x2_barcelona.S
CGEMMINCOPY =
CGEMMITCOPY =
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMINCOPYOBJ =
CGEMMITCOPYOBJ =
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_1x2_barcelona.S
ZGEMMINCOPY = ../generic/zgemm_ncopy_1.c
ZGEMMITCOPY = ../generic/zgemm_tcopy_1.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRSMKERNEL_LN = trsm_kernel_LN_4x4_sse.S
STRSMKERNEL_LT = trsm_kernel_LT_4x4_sse.S
STRSMKERNEL_RN = trsm_kernel_LT_4x4_sse.S
STRSMKERNEL_RT = trsm_kernel_RT_4x4_sse.S
DTRSMKERNEL_LN = trsm_kernel_LN_2x4_sse2.S
DTRSMKERNEL_LT = trsm_kernel_LT_2x4_sse2.S
DTRSMKERNEL_RN = trsm_kernel_LT_2x4_sse2.S
DTRSMKERNEL_RT = trsm_kernel_RT_2x4_sse2.S
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x2_sse.S
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x2_sse.S
CTRSMKERNEL_RN = ztrsm_kernel_LT_2x2_sse.S
CTRSMKERNEL_RT = ztrsm_kernel_RT_2x2_sse.S
ZTRSMKERNEL_LN = ztrsm_kernel_LT_1x2_sse2.S
ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x2_sse2.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x2_sse2.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x2_sse2.S
CGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S
ZGEMM3MKERNEL = zgemm3m_kernel_2x4_barcelona.S

View File

@ -596,7 +596,7 @@
.L22:
mulps %xmm0, %xmm2
addps %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movsd 4 * SIZE(BB), %xmm2
@ -842,7 +842,7 @@
.L32:
mulss %xmm0, %xmm2
addss %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 4 * SIZE(BB), %xmm2
@ -1168,7 +1168,7 @@
.L52:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulps 4 * SIZE(BB), %xmm0
@ -1198,7 +1198,7 @@
addps %xmm0, %xmm5
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm2
@ -1347,7 +1347,7 @@
ALIGN_4
.L62:
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
@ -1531,7 +1531,7 @@
.L72:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulss 4 * SIZE(BB), %xmm0
@ -1778,7 +1778,7 @@
.L92:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(AA), %xmm0
@ -1793,7 +1793,7 @@
mulps 12 * SIZE(BB), %xmm0
addps %xmm0, %xmm7
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm3
@ -1924,7 +1924,7 @@
.L102:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movsd 2 * SIZE(AA), %xmm0
@ -2069,7 +2069,7 @@
.L112:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 1 * SIZE(AA), %xmm0

View File

@ -269,7 +269,7 @@
sarl $5, I
jle .L113
#if defined(BARCELONA)
#if defined(BARCELONA) || defined(BULLDOZER)
movaps %xmm0, %xmm1
mulps -32 * SIZE(X), %xmm1

View File

@ -253,7 +253,7 @@
sarl $4, I
jle .L113
#if defined(BARCELONA)
#if defined(BARCELONA) || defined(BULLDOZER)
movaps %xmm0, %xmm1
mulpd -16 * SIZE(X), %xmm1

View File

@ -69,7 +69,7 @@
#define STACK_ALIGN 4096
#define STACK_OFFSET 1024
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHSIZE (8 * 10 + 4)
#endif
@ -439,7 +439,7 @@
.L22:
mulsd %xmm0, %xmm2
addsd %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movlpd 2 * SIZE(BB), %xmm2
@ -488,7 +488,7 @@
movlpd 40 * SIZE(BB), %xmm3
addsd %xmm0, %xmm7
movlpd 8 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 8) * SIZE(AA)
#endif
mulsd %xmm1, %xmm2
@ -1697,7 +1697,7 @@
.L42:
mulpd %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulpd 2 * SIZE(BB), %xmm0
@ -1727,7 +1727,7 @@
addpd %xmm0, %xmm7
movapd 16 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA)
#endif
mulpd %xmm1, %xmm2

View File

@ -64,7 +64,7 @@
#define BORIG 60(%esp)
#define BUFFER 128(%esp)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 10 + 8)
@ -437,7 +437,7 @@
.L32:
mulss %xmm0, %xmm2
addss %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 4 * SIZE(BB), %xmm2
@ -833,7 +833,7 @@
.L22:
mulps %xmm0, %xmm2
addps %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(BB), %xmm2
@ -1848,7 +1848,7 @@
.L72:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulss 4 * SIZE(BB), %xmm0
@ -2109,7 +2109,7 @@
ALIGN_4
.L62:
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
@ -2429,7 +2429,7 @@
.L52:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulps 4 * SIZE(BB), %xmm0
@ -2459,7 +2459,7 @@
addps %xmm0, %xmm5
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm2
@ -2952,7 +2952,7 @@
.L112:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 1 * SIZE(AA), %xmm0
@ -3148,7 +3148,7 @@
.L102:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movsd 2 * SIZE(AA), %xmm0
@ -3389,7 +3389,7 @@
.L92:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(AA), %xmm0
@ -3404,7 +3404,7 @@
mulps 12 * SIZE(BB), %xmm0
addps %xmm0, %xmm7
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm3

View File

@ -69,7 +69,7 @@
#define STACK_ALIGN 4096
#define STACK_OFFSET 1024
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHSIZE (8 * 10 + 4)
#endif
@ -910,7 +910,7 @@
.L22:
mulsd %xmm0, %xmm2
addsd %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movlpd 2 * SIZE(BB), %xmm2
@ -959,7 +959,7 @@
movlpd 40 * SIZE(BB), %xmm3
addsd %xmm0, %xmm7
movlpd 8 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 8) * SIZE(AA)
#endif
mulsd %xmm1, %xmm2
@ -1439,7 +1439,7 @@
.L42:
mulpd %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulpd 2 * SIZE(BB), %xmm0
@ -1469,7 +1469,7 @@
addpd %xmm0, %xmm7
movapd 16 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA)
#endif
mulpd %xmm1, %xmm2

View File

@ -64,7 +64,7 @@
#define BORIG 60(%esp)
#define BUFFER 128(%esp)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 10 + 8)
@ -872,7 +872,7 @@
.L22:
mulps %xmm0, %xmm2
addps %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(BB), %xmm2
@ -1316,7 +1316,7 @@
.L32:
mulss %xmm0, %xmm2
addss %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 4 * SIZE(BB), %xmm2
@ -1855,7 +1855,7 @@
.L52:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulps 4 * SIZE(BB), %xmm0
@ -1885,7 +1885,7 @@
addps %xmm0, %xmm5
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm2
@ -2249,7 +2249,7 @@
ALIGN_4
.L62:
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
@ -2562,7 +2562,7 @@
.L72:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulss 4 * SIZE(BB), %xmm0
@ -2957,7 +2957,7 @@
.L92:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(AA), %xmm0
@ -2972,7 +2972,7 @@
mulps 12 * SIZE(BB), %xmm0
addps %xmm0, %xmm7
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm3
@ -3280,7 +3280,7 @@
.L102:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movsd 2 * SIZE(AA), %xmm0
@ -3515,7 +3515,7 @@
.L112:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 1 * SIZE(AA), %xmm0

View File

@ -69,7 +69,7 @@
#define STACK_ALIGN 4096
#define STACK_OFFSET 1024
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHSIZE (8 * 10 + 4)
#endif
@ -1036,7 +1036,7 @@
.L42:
mulpd %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulpd 2 * SIZE(BB), %xmm0
@ -1066,7 +1066,7 @@
addpd %xmm0, %xmm7
movapd 16 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA)
#endif
mulpd %xmm1, %xmm2
@ -2224,7 +2224,7 @@
.L22:
mulsd %xmm0, %xmm2
addsd %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movlpd 2 * SIZE(BB), %xmm2
@ -2273,7 +2273,7 @@
movlpd 40 * SIZE(BB), %xmm3
addsd %xmm0, %xmm7
movlpd 8 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 8) * SIZE(AA)
#endif
mulsd %xmm1, %xmm2

View File

@ -64,7 +64,7 @@
#define BORIG 60(%esp)
#define BUFFER 128(%esp)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 10 + 8)
@ -439,7 +439,7 @@
.L92:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(AA), %xmm0
@ -454,7 +454,7 @@
mulps 12 * SIZE(BB), %xmm0
addps %xmm0, %xmm7
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm3
@ -758,7 +758,7 @@
.L102:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movsd 2 * SIZE(AA), %xmm0
@ -993,7 +993,7 @@
.L112:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 1 * SIZE(AA), %xmm0
@ -1324,7 +1324,7 @@
.L52:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulps 4 * SIZE(BB), %xmm0
@ -1354,7 +1354,7 @@
addps %xmm0, %xmm5
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm2
@ -1718,7 +1718,7 @@
ALIGN_4
.L62:
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
@ -2031,7 +2031,7 @@
.L72:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulss 4 * SIZE(BB), %xmm0
@ -2859,7 +2859,7 @@
.L22:
mulps %xmm0, %xmm2
addps %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(BB), %xmm2
@ -3303,7 +3303,7 @@
.L32:
mulss %xmm0, %xmm2
addss %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 4 * SIZE(BB), %xmm2

View File

@ -74,7 +74,7 @@
#define BB %ecx
#define LDC %ebp
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
#define movsd movlps
#endif
@ -625,7 +625,7 @@
.L22:
mulps %xmm0, %xmm2
addps %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movsd 4 * SIZE(BB), %xmm2
@ -870,7 +870,7 @@
.L32:
mulss %xmm0, %xmm2
addss %xmm2, %xmm4
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 4 * SIZE(BB), %xmm2
@ -1173,7 +1173,7 @@
.L52:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulps 4 * SIZE(BB), %xmm0
@ -1203,7 +1203,7 @@
addps %xmm0, %xmm5
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm2
@ -1359,7 +1359,7 @@
ALIGN_4
.L62:
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
@ -1536,7 +1536,7 @@
.L72:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
mulss 4 * SIZE(BB), %xmm0
@ -1794,7 +1794,7 @@
.L92:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movaps 4 * SIZE(AA), %xmm0
@ -1809,7 +1809,7 @@
mulps 12 * SIZE(BB), %xmm0
addps %xmm0, %xmm7
movaps 32 * SIZE(AA), %xmm0
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
mulps %xmm1, %xmm3
@ -1936,7 +1936,7 @@
.L102:
mulps %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movsd 2 * SIZE(AA), %xmm0
@ -2069,7 +2069,7 @@
.L112:
mulss %xmm0, %xmm2
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA)
#endif
movss 1 * SIZE(AA), %xmm0

View File

@ -71,7 +71,7 @@
#define movsd movlps
#endif
#ifdef BARCELONA
#if defined(BARCELONA) || defined(BULLDOZER)
#define PREFETCH prefetchnta
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 5)

View File

@ -58,7 +58,7 @@
#define movsd movlps
#endif
#ifdef BARCELONA
#if defined(BARCELONA) || defined(BULLDOZER)
#define PREFETCH prefetchnta
#define PREFETCHW prefetchw
#define PREFETCHSIZE (8 * 5)

View File

@ -71,7 +71,7 @@
#define movsd movlps
#endif
#ifdef BARCELONA
#if defined(BARCELONA) || defined(BULLDOZER)
#define PREFETCH prefetchnta
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 5)

View File

@ -58,7 +58,7 @@
#define movsd movlps
#endif
#ifdef BARCELONA
#if defined(BARCELONA) || defined(BULLDOZER)
#define PREFETCH prefetchnta
#define PREFETCHW prefetchw
#define PREFETCHSIZE (8 * 5)

View File

@ -75,7 +75,7 @@
#define STACK_ALIGN 4096
#define STACK_OFFSET 1024
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCHSIZE (16 * 10 + 8)
#define WPREFETCHSIZE 112
#define PREFETCH prefetch
@ -533,7 +533,7 @@
addps %xmm0, %xmm7
movsd 16 * SIZE(AA), %xmm0
mulps %xmm1, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
addps %xmm2, %xmm4

View File

@ -75,7 +75,7 @@
#define STACK_ALIGN 4096
#define STACK_OFFSET 1024
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCHSIZE (16 * 10 + 8)
#define WPREFETCHSIZE 112
#define PREFETCH prefetch
@ -994,7 +994,7 @@
addps %xmm0, %xmm7
movsd 16 * SIZE(AA), %xmm0
mulps %xmm1, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
addps %xmm2, %xmm4

View File

@ -75,7 +75,7 @@
#define STACK_ALIGN 4096
#define STACK_OFFSET 1024
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCHSIZE (16 * 10 + 8)
#define WPREFETCHSIZE 112
#define PREFETCH prefetch
@ -1820,7 +1820,7 @@
addps %xmm0, %xmm7
movsd 16 * SIZE(AA), %xmm0
mulps %xmm1, %xmm2
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER)
prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA)
#endif
addps %xmm2, %xmm4

View File

@ -0,0 +1,62 @@
ZGEMVNKERNEL = zgemv_n_dup.S
ZGEMVTKERNEL = zgemv_t_dup.S
SGEMMKERNEL = gemm_kernel_8x4_barcelona.S
SGEMMINCOPY = ../generic/gemm_ncopy_8.c
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
SGEMMONCOPY = gemm_ncopy_4_opteron.S
SGEMMOTCOPY = gemm_tcopy_4_opteron.S
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_4x4_bulldozer.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = gemm_ncopy_4_opteron.S
DGEMMOTCOPY = gemm_tcopy_4_opteron.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_4x2_barcelona.S
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPY = zgemm_ncopy_2.S
CGEMMOTCOPY = zgemm_tcopy_2.S
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = zgemm_kernel_2x2_barcelona.S
ZGEMMINCOPY =
ZGEMMITCOPY =
ZGEMMONCOPY = zgemm_ncopy_2.S
ZGEMMOTCOPY = zgemm_tcopy_2.S
ZGEMMINCOPYOBJ =
ZGEMMITCOPYOBJ =
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRSMKERNEL_LN = trsm_kernel_LN_8x4_sse.S
STRSMKERNEL_LT = trsm_kernel_LT_8x4_sse.S
STRSMKERNEL_RN = trsm_kernel_LT_8x4_sse.S
STRSMKERNEL_RT = trsm_kernel_RT_8x4_sse.S
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_barcelona.S
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_barcelona.S
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_barcelona.S
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_barcelona.S
CTRSMKERNEL_LN = ztrsm_kernel_LN_4x2_sse.S
CTRSMKERNEL_LT = ztrsm_kernel_LT_4x2_sse.S
CTRSMKERNEL_RN = ztrsm_kernel_LT_4x2_sse.S
CTRSMKERNEL_RT = ztrsm_kernel_RT_4x2_sse.S
ZTRSMKERNEL_LN = ztrsm_kernel_LN_2x2_sse2.S
ZTRSMKERNEL_LT = ztrsm_kernel_LT_2x2_sse2.S
ZTRSMKERNEL_RN = ztrsm_kernel_LT_2x2_sse2.S
ZTRSMKERNEL_RT = ztrsm_kernel_RT_2x2_sse2.S
CGEMM3MKERNEL = zgemm3m_kernel_8x4_barcelona.S
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_barcelona.S

File diff suppressed because it is too large Load Diff

View File

@ -930,7 +930,7 @@
.L22:
mulps %xmm8, %xmm9
addps %xmm9, %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movaps 4 * SIZE(BO), %xmm9
@ -983,7 +983,7 @@
addps %xmm8, %xmm3
movaps 0 * SIZE(AO), %xmm8
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO)
#endif
mulps %xmm10, %xmm9
@ -1178,7 +1178,7 @@
.L32:
mulps %xmm8, %xmm9
addps %xmm9, %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movsd 4 * SIZE(BO), %xmm9
@ -1423,7 +1423,7 @@
.L42:
mulss %xmm8, %xmm9
addss %xmm9, %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movss 4 * SIZE(BO), %xmm9
@ -1765,7 +1765,7 @@
.L62:
mulps %xmm8, %xmm9
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
mulps 4 * SIZE(BO), %xmm8
@ -1793,7 +1793,7 @@
addps %xmm8, %xmm5
movaps 32 * SIZE(AO), %xmm8
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO)
#endif
mulps %xmm10, %xmm11
@ -1822,7 +1822,7 @@
addps %xmm10, %xmm5
movaps 48 * SIZE(AO), %xmm10
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 32) * SIZE(AO)
#endif
mulps %xmm12, %xmm13
@ -1851,7 +1851,7 @@
addps %xmm12, %xmm5
movaps 64 * SIZE(AO), %xmm12
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 48) * SIZE(AO)
#endif
mulps %xmm14, %xmm15
@ -2024,7 +2024,7 @@
.L72:
mulps %xmm8, %xmm9
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
@ -2208,7 +2208,7 @@
.L82:
mulps %xmm8, %xmm9
addps %xmm9, %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movsd 4 * SIZE(BO), %xmm9
@ -2395,7 +2395,7 @@
.L92:
mulps %xmm8, %xmm9
addps %xmm9, %xmm0
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movss 4 * SIZE(BO), %xmm9
@ -2670,7 +2670,7 @@
.L112:
mulps %xmm9, %xmm8
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
@ -2687,7 +2687,7 @@
addps %xmm9, %xmm4
movaps 8 * SIZE(BO), %xmm9
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO)
#endif
mulps %xmm9, %xmm10
@ -2704,7 +2704,7 @@
addps %xmm9, %xmm4
movaps 32 * SIZE(BO), %xmm9
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 32) * SIZE(AO)
#endif
mulps %xmm11, %xmm12
@ -2721,7 +2721,7 @@
addps %xmm11, %xmm4
movaps 24 * SIZE(BO), %xmm11
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 48) * SIZE(AO)
#endif
mulps %xmm11, %xmm14
@ -2857,7 +2857,7 @@
.L122:
mulps %xmm8, %xmm9
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movaps -28 * SIZE(AO), %xmm8
@ -2873,7 +2873,7 @@
addps %xmm8, %xmm3
movaps 0 * SIZE(AO), %xmm8
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO)
#endif
mulps %xmm10, %xmm11
@ -3003,7 +3003,7 @@
.L132:
mulps %xmm8, %xmm9
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movsd -30 * SIZE(AO), %xmm8
@ -3150,7 +3150,7 @@
.L142:
mulss %xmm8, %xmm9
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
#endif
movss -31 * SIZE(AO), %xmm8

View File

@ -39,7 +39,7 @@
#define ASSEMBLER
#include "common.h"
#if defined(BARCELONA) || defined(SHANGHAI)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define RPREFETCHSIZE (12 + 4)
#define WPREFETCHSIZE (48 + 4)
#define MOVNTQ MOVQ
@ -79,7 +79,7 @@
#define AO3 %r13
#define AO4 %rax
#if defined(BARCELONA) || defined(SHANGHAI)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define RPREFETCH prefetch
#else
#define RPREFETCH prefetch

View File

@ -39,7 +39,7 @@
#define ASSEMBLER
#include "common.h"
#if defined(BARCELONA) || defined(SHANGHAI)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define RPREFETCHSIZE (12 + 4)
#define WPREFETCHSIZE (12 + 4)
#define MOVNTQ MOVQ
@ -96,7 +96,7 @@
#endif
#if defined(BARCELONA) || defined(SHANGHAI)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define RPREFETCH prefetch
#else
#define RPREFETCH prefetch

View File

@ -469,7 +469,7 @@
ALIGN_4
.L71:
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
prefetch PREFETCHSIZE * SIZE(X)
#endif

View File

@ -266,7 +266,7 @@
sarq $5, I
jle .L113
#if defined(BARCELONA) || defined(SHANGHAI)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
movaps %xmm0, %xmm1
mulps -32 * SIZE(X), %xmm1

View File

@ -251,7 +251,7 @@
sarq $4, I
jle .L113
#if defined(BARCELONA) || defined(SHANGHAI)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
movaps %xmm0, %xmm1
mulpd -16 * SIZE(X), %xmm1

View File

@ -76,7 +76,7 @@
#define movsd movlps
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 16)

View File

@ -76,7 +76,7 @@
#define movsd movlpd
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 16)

View File

@ -76,7 +76,7 @@
#define movsd movlps
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 16)

View File

@ -76,7 +76,7 @@
#define movsd movlpd
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 16)

View File

@ -86,7 +86,7 @@
#define PREFETCHW prefetcht0
#endif
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define movsd movlps

View File

@ -86,7 +86,7 @@
#define PREFETCHW prefetcht0
#endif
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define movsd movlps

View File

@ -86,7 +86,7 @@
#define PREFETCHW prefetcht0
#endif
#if defined(OPTERON) || defined(BARCELONA)
#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define movsd movlps

View File

@ -85,7 +85,7 @@
#define movsd movlpd
#endif
#if defined(BARCELONA) || defined(SHANGHAI)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define RPREFETCHSIZE 32
#define WPREFETCHSIZE 48
#endif

View File

@ -160,7 +160,7 @@
#define a3 %xmm14
#define xt1 %xmm15
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define MOVDDUP(a, b, c) movddup a(b), c
#define MOVDDUP2(a, b, c) movddup a##b, c
#else

View File

@ -76,7 +76,7 @@
#define movsd movlpd
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 16)
@ -167,7 +167,7 @@
#define a3 %xmm14
#define xt1 %xmm15
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI)
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define MOVDDUP(a, b, c) movddup a(b), c
#define MOVDDUP2(a, b, c) movddup a##b, c
#else

View File

@ -76,7 +76,7 @@
#define movsd movlpd
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 16)
@ -166,7 +166,7 @@
#define xt1 %xmm14
#define xt2 %xmm15
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI)
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define MOVDDUP(a, b, c) movddup a(b), c
#define MOVDDUP2(a, b, c) movddup a##b, c
#else

View File

@ -76,7 +76,7 @@
#define movsd movlpd
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (16 * 16)
@ -166,7 +166,7 @@
#define a3 %xmm14
#define xt1 %xmm15
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI)
#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER)
#define MOVDDUP(a, b, c) movddup a(b), c
#define MOVDDUP2(a, b, c) movddup a##b, c
#else

View File

@ -86,7 +86,7 @@
#define BORIG 72(%rsp)
#define BUFFER 128(%rsp)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHNTA prefetchnta

View File

@ -95,7 +95,7 @@
#define PREFETCHSIZE (8 * 6 + 4)
#endif
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHNTA prefetchnta

View File

@ -86,7 +86,7 @@
#define BORIG 72(%rsp)
#define BUFFER 128(%rsp)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHNTA prefetchnta

View File

@ -95,7 +95,7 @@
#define PREFETCHSIZE (8 * 6 + 4)
#endif
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHNTA prefetchnta

View File

@ -86,7 +86,7 @@
#define BORIG 72(%rsp)
#define BUFFER 128(%rsp)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHNTA prefetchnta

View File

@ -95,7 +95,7 @@
#define PREFETCHSIZE (8 * 6 + 4)
#endif
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHNTA prefetchnta

View File

@ -74,6 +74,13 @@
#define ALIGNED_ACCESS
#endif
#ifdef BULLDOZER
#define PREFETCH prefetch
#define PREFETCHW prefetchw
#define PREFETCHSIZE (128 * 5)
#define ALIGNED_ACCESS
#endif
#ifdef NANO
#define PREFETCH prefetcht0
#define PREFETCHW prefetcht0

View File

@ -85,7 +85,7 @@
#define movsd movlps
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define ALIGNED_ACCESS
#define MOVUPS_A movaps
#define MOVUPS_XL movaps

View File

@ -143,7 +143,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT)
#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER)
#define SNUMOPT 8
#define DNUMOPT 4