From bfaaa975e6789acbce20384d01bd34b122832d18 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Fri, 7 Dec 2012 00:53:31 +0800 Subject: [PATCH] Added BULLDOZER target. So far it uses barcelona kernels. --- TargetList.txt | 1 + driver/others/parameter.c | 2 +- getarch.c | 4 +-- kernel/setparam-ref.c | 16 ++++++++++ kernel/x86/gemm_kernel_4x4_barcelona.S | 20 ++++++------ kernel/x86/scal_sse.S | 2 +- kernel/x86/scal_sse2.S | 2 +- kernel/x86/trsm_kernel_LN_2x4_sse2.S | 10 +++--- kernel/x86/trsm_kernel_LN_4x4_sse.S | 22 ++++++------- kernel/x86/trsm_kernel_LT_2x4_sse2.S | 10 +++--- kernel/x86/trsm_kernel_LT_4x4_sse.S | 22 ++++++------- kernel/x86/trsm_kernel_RT_2x4_sse2.S | 10 +++--- kernel/x86/trsm_kernel_RT_4x4_sse.S | 22 ++++++------- kernel/x86/zgemm3m_kernel_4x4_barcelona.S | 22 ++++++------- kernel/x86/zgemv_n_sse.S | 2 +- kernel/x86/zgemv_n_sse2.S | 2 +- kernel/x86/zgemv_t_sse.S | 2 +- kernel/x86/zgemv_t_sse2.S | 2 +- kernel/x86/ztrsm_kernel_LN_2x2_sse.S | 4 +-- kernel/x86/ztrsm_kernel_LT_2x2_sse.S | 4 +-- kernel/x86/ztrsm_kernel_RT_2x2_sse.S | 4 +-- kernel/x86_64/gemm_kernel_8x4_barcelona.S | 38 +++++++++++------------ kernel/x86_64/gemm_ncopy_4_opteron.S | 4 +-- kernel/x86_64/gemm_tcopy_4_opteron.S | 4 +-- kernel/x86_64/izamax_sse2.S | 2 +- kernel/x86_64/scal_sse.S | 2 +- kernel/x86_64/scal_sse2.S | 2 +- kernel/x86_64/symv_L_sse.S | 2 +- kernel/x86_64/symv_L_sse2.S | 2 +- kernel/x86_64/symv_U_sse.S | 2 +- kernel/x86_64/symv_U_sse2.S | 2 +- kernel/x86_64/trsm_kernel_LN_8x4_sse.S | 2 +- kernel/x86_64/trsm_kernel_LT_8x4_sse.S | 2 +- kernel/x86_64/trsm_kernel_RT_8x4_sse.S | 2 +- kernel/x86_64/zgemm_ncopy_2.S | 2 +- kernel/x86_64/zsymv_L_sse.S | 2 +- kernel/x86_64/zsymv_L_sse2.S | 4 +-- kernel/x86_64/zsymv_U_sse.S | 4 +-- kernel/x86_64/zsymv_U_sse2.S | 4 +-- kernel/x86_64/ztrsm_kernel_LN_2x2_sse2.S | 2 +- kernel/x86_64/ztrsm_kernel_LN_4x2_sse.S | 2 +- kernel/x86_64/ztrsm_kernel_LT_2x2_sse2.S | 2 +- kernel/x86_64/ztrsm_kernel_LT_4x2_sse.S | 2 +- kernel/x86_64/ztrsm_kernel_RT_2x2_sse2.S | 2 +- kernel/x86_64/ztrsm_kernel_RT_4x2_sse.S | 2 +- l1param.h | 7 +++++ l2param.h | 2 +- 47 files changed, 156 insertions(+), 132 deletions(-) diff --git a/TargetList.txt b/TargetList.txt index 1a212e6ca..c859db082 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -29,6 +29,7 @@ BARCELONA SHANGHAI ISTANBUL BOBCAT +BULLDOZER c)VIA CPU: SSE_GENERIC diff --git a/driver/others/parameter.c b/driver/others/parameter.c index d261e5a4e..58e5fb11d 100644 --- a/driver/others/parameter.c +++ b/driver/others/parameter.c @@ -163,7 +163,7 @@ int get_L2_size(void){ int eax, ebx, ecx, edx; -#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || \ +#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) diff --git a/getarch.c b/getarch.c index 4daf260f0..2b9856338 100644 --- a/getarch.c +++ b/getarch.c @@ -385,12 +385,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define FORCE_INTEL #define ARCHITECTURE "X86" #define SUBARCHITECTURE "BULLDOZER" -#define ARCHCONFIG "-DBARCELONA " \ +#define ARCHCONFIG "-DBULLDOZER " \ "-DL1_DATA_SIZE=49152 -DL1_DATA_LINESIZE=64 " \ "-DL2_SIZE=1024000 -DL2_LINESIZE=64 -DL3_SIZE=16777216 " \ "-DDTB_DEFAULT_ENTRIES=32 -DDTB_SIZE=4096 " \ "-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \ - "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU" \ + "-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU " \ "-DHAVE_AVX -DHAVE_FMA4" #define LIBNAME "bulldozer" #define CORENAME "BULLDOZER" diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index f57b425e6..e8db76871 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -810,6 +810,22 @@ static void init_parameter(void) { #endif #endif +#ifdef BULLDOZER + +#ifdef DEBUG + fprintf(stderr, "Bulldozer\n"); +#endif + + TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; + TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; + TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; + TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#ifdef EXPRECISION + TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; + TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; +#endif +#endif + #ifdef NANO #ifdef DEBUG diff --git a/kernel/x86/gemm_kernel_4x4_barcelona.S b/kernel/x86/gemm_kernel_4x4_barcelona.S index 18b9a43bd..f081aec2a 100644 --- a/kernel/x86/gemm_kernel_4x4_barcelona.S +++ b/kernel/x86/gemm_kernel_4x4_barcelona.S @@ -596,7 +596,7 @@ .L22: mulps %xmm0, %xmm2 addps %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movsd 4 * SIZE(BB), %xmm2 @@ -842,7 +842,7 @@ .L32: mulss %xmm0, %xmm2 addss %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 4 * SIZE(BB), %xmm2 @@ -1168,7 +1168,7 @@ .L52: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulps 4 * SIZE(BB), %xmm0 @@ -1198,7 +1198,7 @@ addps %xmm0, %xmm5 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm2 @@ -1347,7 +1347,7 @@ ALIGN_4 .L62: -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif @@ -1531,7 +1531,7 @@ .L72: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulss 4 * SIZE(BB), %xmm0 @@ -1778,7 +1778,7 @@ .L92: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(AA), %xmm0 @@ -1793,7 +1793,7 @@ mulps 12 * SIZE(BB), %xmm0 addps %xmm0, %xmm7 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm3 @@ -1924,7 +1924,7 @@ .L102: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movsd 2 * SIZE(AA), %xmm0 @@ -2069,7 +2069,7 @@ .L112: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 1 * SIZE(AA), %xmm0 diff --git a/kernel/x86/scal_sse.S b/kernel/x86/scal_sse.S index aa5ab760e..48edfc585 100644 --- a/kernel/x86/scal_sse.S +++ b/kernel/x86/scal_sse.S @@ -269,7 +269,7 @@ sarl $5, I jle .L113 -#if defined(BARCELONA) +#if defined(BARCELONA) || defined(BULLDOZER) movaps %xmm0, %xmm1 mulps -32 * SIZE(X), %xmm1 diff --git a/kernel/x86/scal_sse2.S b/kernel/x86/scal_sse2.S index 67c1f437b..35b79132c 100644 --- a/kernel/x86/scal_sse2.S +++ b/kernel/x86/scal_sse2.S @@ -253,7 +253,7 @@ sarl $4, I jle .L113 -#if defined(BARCELONA) +#if defined(BARCELONA) || defined(BULLDOZER) movaps %xmm0, %xmm1 mulpd -16 * SIZE(X), %xmm1 diff --git a/kernel/x86/trsm_kernel_LN_2x4_sse2.S b/kernel/x86/trsm_kernel_LN_2x4_sse2.S index 2b6877a31..036e17338 100644 --- a/kernel/x86/trsm_kernel_LN_2x4_sse2.S +++ b/kernel/x86/trsm_kernel_LN_2x4_sse2.S @@ -69,7 +69,7 @@ #define STACK_ALIGN 4096 #define STACK_OFFSET 1024 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHSIZE (8 * 10 + 4) #endif @@ -439,7 +439,7 @@ .L22: mulsd %xmm0, %xmm2 addsd %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) #endif movlpd 2 * SIZE(BB), %xmm2 @@ -488,7 +488,7 @@ movlpd 40 * SIZE(BB), %xmm3 addsd %xmm0, %xmm7 movlpd 8 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) #endif mulsd %xmm1, %xmm2 @@ -1697,7 +1697,7 @@ .L42: mulpd %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulpd 2 * SIZE(BB), %xmm0 @@ -1727,7 +1727,7 @@ addpd %xmm0, %xmm7 movapd 16 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) #endif mulpd %xmm1, %xmm2 diff --git a/kernel/x86/trsm_kernel_LN_4x4_sse.S b/kernel/x86/trsm_kernel_LN_4x4_sse.S index 82bb1d3ec..84da443a8 100644 --- a/kernel/x86/trsm_kernel_LN_4x4_sse.S +++ b/kernel/x86/trsm_kernel_LN_4x4_sse.S @@ -64,7 +64,7 @@ #define BORIG 60(%esp) #define BUFFER 128(%esp) -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 10 + 8) @@ -437,7 +437,7 @@ .L32: mulss %xmm0, %xmm2 addss %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 4 * SIZE(BB), %xmm2 @@ -833,7 +833,7 @@ .L22: mulps %xmm0, %xmm2 addps %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(BB), %xmm2 @@ -1848,7 +1848,7 @@ .L72: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulss 4 * SIZE(BB), %xmm0 @@ -2109,7 +2109,7 @@ ALIGN_4 .L62: -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif @@ -2429,7 +2429,7 @@ .L52: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulps 4 * SIZE(BB), %xmm0 @@ -2459,7 +2459,7 @@ addps %xmm0, %xmm5 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm2 @@ -2952,7 +2952,7 @@ .L112: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 1 * SIZE(AA), %xmm0 @@ -3148,7 +3148,7 @@ .L102: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movsd 2 * SIZE(AA), %xmm0 @@ -3389,7 +3389,7 @@ .L92: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(AA), %xmm0 @@ -3404,7 +3404,7 @@ mulps 12 * SIZE(BB), %xmm0 addps %xmm0, %xmm7 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm3 diff --git a/kernel/x86/trsm_kernel_LT_2x4_sse2.S b/kernel/x86/trsm_kernel_LT_2x4_sse2.S index d81177b7e..0bd924cba 100644 --- a/kernel/x86/trsm_kernel_LT_2x4_sse2.S +++ b/kernel/x86/trsm_kernel_LT_2x4_sse2.S @@ -69,7 +69,7 @@ #define STACK_ALIGN 4096 #define STACK_OFFSET 1024 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHSIZE (8 * 10 + 4) #endif @@ -910,7 +910,7 @@ .L22: mulsd %xmm0, %xmm2 addsd %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) #endif movlpd 2 * SIZE(BB), %xmm2 @@ -959,7 +959,7 @@ movlpd 40 * SIZE(BB), %xmm3 addsd %xmm0, %xmm7 movlpd 8 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) #endif mulsd %xmm1, %xmm2 @@ -1439,7 +1439,7 @@ .L42: mulpd %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulpd 2 * SIZE(BB), %xmm0 @@ -1469,7 +1469,7 @@ addpd %xmm0, %xmm7 movapd 16 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) #endif mulpd %xmm1, %xmm2 diff --git a/kernel/x86/trsm_kernel_LT_4x4_sse.S b/kernel/x86/trsm_kernel_LT_4x4_sse.S index 854c44e7a..de7c04593 100644 --- a/kernel/x86/trsm_kernel_LT_4x4_sse.S +++ b/kernel/x86/trsm_kernel_LT_4x4_sse.S @@ -64,7 +64,7 @@ #define BORIG 60(%esp) #define BUFFER 128(%esp) -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 10 + 8) @@ -872,7 +872,7 @@ .L22: mulps %xmm0, %xmm2 addps %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(BB), %xmm2 @@ -1316,7 +1316,7 @@ .L32: mulss %xmm0, %xmm2 addss %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 4 * SIZE(BB), %xmm2 @@ -1855,7 +1855,7 @@ .L52: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulps 4 * SIZE(BB), %xmm0 @@ -1885,7 +1885,7 @@ addps %xmm0, %xmm5 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm2 @@ -2249,7 +2249,7 @@ ALIGN_4 .L62: -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif @@ -2562,7 +2562,7 @@ .L72: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulss 4 * SIZE(BB), %xmm0 @@ -2957,7 +2957,7 @@ .L92: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(AA), %xmm0 @@ -2972,7 +2972,7 @@ mulps 12 * SIZE(BB), %xmm0 addps %xmm0, %xmm7 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm3 @@ -3280,7 +3280,7 @@ .L102: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movsd 2 * SIZE(AA), %xmm0 @@ -3515,7 +3515,7 @@ .L112: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 1 * SIZE(AA), %xmm0 diff --git a/kernel/x86/trsm_kernel_RT_2x4_sse2.S b/kernel/x86/trsm_kernel_RT_2x4_sse2.S index f7a08c699..f5d5ad465 100644 --- a/kernel/x86/trsm_kernel_RT_2x4_sse2.S +++ b/kernel/x86/trsm_kernel_RT_2x4_sse2.S @@ -69,7 +69,7 @@ #define STACK_ALIGN 4096 #define STACK_OFFSET 1024 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHSIZE (8 * 10 + 4) #endif @@ -1036,7 +1036,7 @@ .L42: mulpd %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulpd 2 * SIZE(BB), %xmm0 @@ -1066,7 +1066,7 @@ addpd %xmm0, %xmm7 movapd 16 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 8) * SIZE(AA) #endif mulpd %xmm1, %xmm2 @@ -2224,7 +2224,7 @@ .L22: mulsd %xmm0, %xmm2 addsd %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AA) #endif movlpd 2 * SIZE(BB), %xmm2 @@ -2273,7 +2273,7 @@ movlpd 40 * SIZE(BB), %xmm3 addsd %xmm0, %xmm7 movlpd 8 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 8) * SIZE(AA) #endif mulsd %xmm1, %xmm2 diff --git a/kernel/x86/trsm_kernel_RT_4x4_sse.S b/kernel/x86/trsm_kernel_RT_4x4_sse.S index 80dc2451c..5c2dcd0d6 100644 --- a/kernel/x86/trsm_kernel_RT_4x4_sse.S +++ b/kernel/x86/trsm_kernel_RT_4x4_sse.S @@ -64,7 +64,7 @@ #define BORIG 60(%esp) #define BUFFER 128(%esp) -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 10 + 8) @@ -439,7 +439,7 @@ .L92: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(AA), %xmm0 @@ -454,7 +454,7 @@ mulps 12 * SIZE(BB), %xmm0 addps %xmm0, %xmm7 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm3 @@ -758,7 +758,7 @@ .L102: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movsd 2 * SIZE(AA), %xmm0 @@ -993,7 +993,7 @@ .L112: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 1 * SIZE(AA), %xmm0 @@ -1324,7 +1324,7 @@ .L52: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulps 4 * SIZE(BB), %xmm0 @@ -1354,7 +1354,7 @@ addps %xmm0, %xmm5 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm2 @@ -1718,7 +1718,7 @@ ALIGN_4 .L62: -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif @@ -2031,7 +2031,7 @@ .L72: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulss 4 * SIZE(BB), %xmm0 @@ -2859,7 +2859,7 @@ .L22: mulps %xmm0, %xmm2 addps %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(BB), %xmm2 @@ -3303,7 +3303,7 @@ .L32: mulss %xmm0, %xmm2 addss %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 4 * SIZE(BB), %xmm2 diff --git a/kernel/x86/zgemm3m_kernel_4x4_barcelona.S b/kernel/x86/zgemm3m_kernel_4x4_barcelona.S index 29158df25..623f0beec 100644 --- a/kernel/x86/zgemm3m_kernel_4x4_barcelona.S +++ b/kernel/x86/zgemm3m_kernel_4x4_barcelona.S @@ -74,7 +74,7 @@ #define BB %ecx #define LDC %ebp -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) #define movsd movlps #endif @@ -625,7 +625,7 @@ .L22: mulps %xmm0, %xmm2 addps %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movsd 4 * SIZE(BB), %xmm2 @@ -870,7 +870,7 @@ .L32: mulss %xmm0, %xmm2 addss %xmm2, %xmm4 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 4 * SIZE(BB), %xmm2 @@ -1173,7 +1173,7 @@ .L52: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulps 4 * SIZE(BB), %xmm0 @@ -1203,7 +1203,7 @@ addps %xmm0, %xmm5 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm2 @@ -1359,7 +1359,7 @@ ALIGN_4 .L62: -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif @@ -1536,7 +1536,7 @@ .L72: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif mulss 4 * SIZE(BB), %xmm0 @@ -1794,7 +1794,7 @@ .L92: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movaps 4 * SIZE(AA), %xmm0 @@ -1809,7 +1809,7 @@ mulps 12 * SIZE(BB), %xmm0 addps %xmm0, %xmm7 movaps 32 * SIZE(AA), %xmm0 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 16) * SIZE(AA) #endif mulps %xmm1, %xmm3 @@ -1936,7 +1936,7 @@ .L102: mulps %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movsd 2 * SIZE(AA), %xmm0 @@ -2069,7 +2069,7 @@ .L112: mulss %xmm0, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) prefetcht0 (PREFETCHSIZE + 0) * SIZE(AA) #endif movss 1 * SIZE(AA), %xmm0 diff --git a/kernel/x86/zgemv_n_sse.S b/kernel/x86/zgemv_n_sse.S index 8e28bb8e6..0087ac6f4 100644 --- a/kernel/x86/zgemv_n_sse.S +++ b/kernel/x86/zgemv_n_sse.S @@ -71,7 +71,7 @@ #define movsd movlps #endif -#ifdef BARCELONA +#if defined(BARCELONA) || defined(BULLDOZER) #define PREFETCH prefetchnta #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 5) diff --git a/kernel/x86/zgemv_n_sse2.S b/kernel/x86/zgemv_n_sse2.S index 607c51de0..f0f2dc0ec 100644 --- a/kernel/x86/zgemv_n_sse2.S +++ b/kernel/x86/zgemv_n_sse2.S @@ -58,7 +58,7 @@ #define movsd movlps #endif -#ifdef BARCELONA +#if defined(BARCELONA) || defined(BULLDOZER) #define PREFETCH prefetchnta #define PREFETCHW prefetchw #define PREFETCHSIZE (8 * 5) diff --git a/kernel/x86/zgemv_t_sse.S b/kernel/x86/zgemv_t_sse.S index fb98226ee..c7ad91235 100644 --- a/kernel/x86/zgemv_t_sse.S +++ b/kernel/x86/zgemv_t_sse.S @@ -71,7 +71,7 @@ #define movsd movlps #endif -#ifdef BARCELONA +#if defined(BARCELONA) || defined(BULLDOZER) #define PREFETCH prefetchnta #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 5) diff --git a/kernel/x86/zgemv_t_sse2.S b/kernel/x86/zgemv_t_sse2.S index e2f391a82..6c4842893 100644 --- a/kernel/x86/zgemv_t_sse2.S +++ b/kernel/x86/zgemv_t_sse2.S @@ -58,7 +58,7 @@ #define movsd movlps #endif -#ifdef BARCELONA +#if defined(BARCELONA) || defined(BULLDOZER) #define PREFETCH prefetchnta #define PREFETCHW prefetchw #define PREFETCHSIZE (8 * 5) diff --git a/kernel/x86/ztrsm_kernel_LN_2x2_sse.S b/kernel/x86/ztrsm_kernel_LN_2x2_sse.S index ee9eb9d25..d32451574 100644 --- a/kernel/x86/ztrsm_kernel_LN_2x2_sse.S +++ b/kernel/x86/ztrsm_kernel_LN_2x2_sse.S @@ -75,7 +75,7 @@ #define STACK_ALIGN 4096 #define STACK_OFFSET 1024 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCHSIZE (16 * 10 + 8) #define WPREFETCHSIZE 112 #define PREFETCH prefetch @@ -533,7 +533,7 @@ addps %xmm0, %xmm7 movsd 16 * SIZE(AA), %xmm0 mulps %xmm1, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) #endif addps %xmm2, %xmm4 diff --git a/kernel/x86/ztrsm_kernel_LT_2x2_sse.S b/kernel/x86/ztrsm_kernel_LT_2x2_sse.S index 9ef572470..9f9449852 100644 --- a/kernel/x86/ztrsm_kernel_LT_2x2_sse.S +++ b/kernel/x86/ztrsm_kernel_LT_2x2_sse.S @@ -75,7 +75,7 @@ #define STACK_ALIGN 4096 #define STACK_OFFSET 1024 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCHSIZE (16 * 10 + 8) #define WPREFETCHSIZE 112 #define PREFETCH prefetch @@ -994,7 +994,7 @@ addps %xmm0, %xmm7 movsd 16 * SIZE(AA), %xmm0 mulps %xmm1, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) #endif addps %xmm2, %xmm4 diff --git a/kernel/x86/ztrsm_kernel_RT_2x2_sse.S b/kernel/x86/ztrsm_kernel_RT_2x2_sse.S index cd1bf2f53..dd0c5ab21 100644 --- a/kernel/x86/ztrsm_kernel_RT_2x2_sse.S +++ b/kernel/x86/ztrsm_kernel_RT_2x2_sse.S @@ -75,7 +75,7 @@ #define STACK_ALIGN 4096 #define STACK_OFFSET 1024 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCHSIZE (16 * 10 + 8) #define WPREFETCHSIZE 112 #define PREFETCH prefetch @@ -1820,7 +1820,7 @@ addps %xmm0, %xmm7 movsd 16 * SIZE(AA), %xmm0 mulps %xmm1, %xmm2 -#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) prefetcht1 (PREFETCHSIZE + 16) * SIZE(AA) #endif addps %xmm2, %xmm4 diff --git a/kernel/x86_64/gemm_kernel_8x4_barcelona.S b/kernel/x86_64/gemm_kernel_8x4_barcelona.S index b40c8bac7..becd19544 100644 --- a/kernel/x86_64/gemm_kernel_8x4_barcelona.S +++ b/kernel/x86_64/gemm_kernel_8x4_barcelona.S @@ -930,7 +930,7 @@ .L22: mulps %xmm8, %xmm9 addps %xmm9, %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movaps 4 * SIZE(BO), %xmm9 @@ -983,7 +983,7 @@ addps %xmm8, %xmm3 movaps 0 * SIZE(AO), %xmm8 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) #endif mulps %xmm10, %xmm9 @@ -1178,7 +1178,7 @@ .L32: mulps %xmm8, %xmm9 addps %xmm9, %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movsd 4 * SIZE(BO), %xmm9 @@ -1423,7 +1423,7 @@ .L42: mulss %xmm8, %xmm9 addss %xmm9, %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movss 4 * SIZE(BO), %xmm9 @@ -1765,7 +1765,7 @@ .L62: mulps %xmm8, %xmm9 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif mulps 4 * SIZE(BO), %xmm8 @@ -1793,7 +1793,7 @@ addps %xmm8, %xmm5 movaps 32 * SIZE(AO), %xmm8 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) #endif mulps %xmm10, %xmm11 @@ -1822,7 +1822,7 @@ addps %xmm10, %xmm5 movaps 48 * SIZE(AO), %xmm10 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) #endif mulps %xmm12, %xmm13 @@ -1851,7 +1851,7 @@ addps %xmm12, %xmm5 movaps 64 * SIZE(AO), %xmm12 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) #endif mulps %xmm14, %xmm15 @@ -2024,7 +2024,7 @@ .L72: mulps %xmm8, %xmm9 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif @@ -2208,7 +2208,7 @@ .L82: mulps %xmm8, %xmm9 addps %xmm9, %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movsd 4 * SIZE(BO), %xmm9 @@ -2395,7 +2395,7 @@ .L92: mulps %xmm8, %xmm9 addps %xmm9, %xmm0 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movss 4 * SIZE(BO), %xmm9 @@ -2670,7 +2670,7 @@ .L112: mulps %xmm9, %xmm8 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif @@ -2687,7 +2687,7 @@ addps %xmm9, %xmm4 movaps 8 * SIZE(BO), %xmm9 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) #endif mulps %xmm9, %xmm10 @@ -2704,7 +2704,7 @@ addps %xmm9, %xmm4 movaps 32 * SIZE(BO), %xmm9 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 32) * SIZE(AO) #endif mulps %xmm11, %xmm12 @@ -2721,7 +2721,7 @@ addps %xmm11, %xmm4 movaps 24 * SIZE(BO), %xmm11 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 48) * SIZE(AO) #endif mulps %xmm11, %xmm14 @@ -2857,7 +2857,7 @@ .L122: mulps %xmm8, %xmm9 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movaps -28 * SIZE(AO), %xmm8 @@ -2873,7 +2873,7 @@ addps %xmm8, %xmm3 movaps 0 * SIZE(AO), %xmm8 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 16) * SIZE(AO) #endif mulps %xmm10, %xmm11 @@ -3003,7 +3003,7 @@ .L132: mulps %xmm8, %xmm9 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movsd -30 * SIZE(AO), %xmm8 @@ -3150,7 +3150,7 @@ .L142: mulss %xmm8, %xmm9 -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) PREFETCH (PREFETCHSIZE + 0) * SIZE(AO) #endif movss -31 * SIZE(AO), %xmm8 diff --git a/kernel/x86_64/gemm_ncopy_4_opteron.S b/kernel/x86_64/gemm_ncopy_4_opteron.S index edde7e2c1..e5cbd62eb 100644 --- a/kernel/x86_64/gemm_ncopy_4_opteron.S +++ b/kernel/x86_64/gemm_ncopy_4_opteron.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#if defined(BARCELONA) || defined(SHANGHAI) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define RPREFETCHSIZE (12 + 4) #define WPREFETCHSIZE (48 + 4) #define MOVNTQ MOVQ @@ -79,7 +79,7 @@ #define AO3 %r13 #define AO4 %rax -#if defined(BARCELONA) || defined(SHANGHAI) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define RPREFETCH prefetch #else #define RPREFETCH prefetch diff --git a/kernel/x86_64/gemm_tcopy_4_opteron.S b/kernel/x86_64/gemm_tcopy_4_opteron.S index 459eeb8c5..105fe3b47 100644 --- a/kernel/x86_64/gemm_tcopy_4_opteron.S +++ b/kernel/x86_64/gemm_tcopy_4_opteron.S @@ -39,7 +39,7 @@ #define ASSEMBLER #include "common.h" -#if defined(BARCELONA) || defined(SHANGHAI) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define RPREFETCHSIZE (12 + 4) #define WPREFETCHSIZE (12 + 4) #define MOVNTQ MOVQ @@ -96,7 +96,7 @@ #endif -#if defined(BARCELONA) || defined(SHANGHAI) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define RPREFETCH prefetch #else #define RPREFETCH prefetch diff --git a/kernel/x86_64/izamax_sse2.S b/kernel/x86_64/izamax_sse2.S index 4e66e5338..404608256 100644 --- a/kernel/x86_64/izamax_sse2.S +++ b/kernel/x86_64/izamax_sse2.S @@ -469,7 +469,7 @@ ALIGN_4 .L71: -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) prefetch PREFETCHSIZE * SIZE(X) #endif diff --git a/kernel/x86_64/scal_sse.S b/kernel/x86_64/scal_sse.S index 323e8b9dd..9c8dd9dc2 100644 --- a/kernel/x86_64/scal_sse.S +++ b/kernel/x86_64/scal_sse.S @@ -266,7 +266,7 @@ sarq $5, I jle .L113 -#if defined(BARCELONA) || defined(SHANGHAI) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) movaps %xmm0, %xmm1 mulps -32 * SIZE(X), %xmm1 diff --git a/kernel/x86_64/scal_sse2.S b/kernel/x86_64/scal_sse2.S index 8f5612081..3823b1fc9 100644 --- a/kernel/x86_64/scal_sse2.S +++ b/kernel/x86_64/scal_sse2.S @@ -251,7 +251,7 @@ sarq $4, I jle .L113 -#if defined(BARCELONA) || defined(SHANGHAI) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) movaps %xmm0, %xmm1 mulpd -16 * SIZE(X), %xmm1 diff --git a/kernel/x86_64/symv_L_sse.S b/kernel/x86_64/symv_L_sse.S index 5a123d7f6..0f1ebd564 100644 --- a/kernel/x86_64/symv_L_sse.S +++ b/kernel/x86_64/symv_L_sse.S @@ -76,7 +76,7 @@ #define movsd movlps #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 16) diff --git a/kernel/x86_64/symv_L_sse2.S b/kernel/x86_64/symv_L_sse2.S index 8afdc87db..9dd123c52 100644 --- a/kernel/x86_64/symv_L_sse2.S +++ b/kernel/x86_64/symv_L_sse2.S @@ -76,7 +76,7 @@ #define movsd movlpd #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 16) diff --git a/kernel/x86_64/symv_U_sse.S b/kernel/x86_64/symv_U_sse.S index 5aef6b461..93a66aaa7 100644 --- a/kernel/x86_64/symv_U_sse.S +++ b/kernel/x86_64/symv_U_sse.S @@ -76,7 +76,7 @@ #define movsd movlps #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 16) diff --git a/kernel/x86_64/symv_U_sse2.S b/kernel/x86_64/symv_U_sse2.S index fa1bfba85..f412b3e2f 100644 --- a/kernel/x86_64/symv_U_sse2.S +++ b/kernel/x86_64/symv_U_sse2.S @@ -76,7 +76,7 @@ #define movsd movlpd #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 16) diff --git a/kernel/x86_64/trsm_kernel_LN_8x4_sse.S b/kernel/x86_64/trsm_kernel_LN_8x4_sse.S index 513572ee9..552dbacdc 100644 --- a/kernel/x86_64/trsm_kernel_LN_8x4_sse.S +++ b/kernel/x86_64/trsm_kernel_LN_8x4_sse.S @@ -86,7 +86,7 @@ #define PREFETCHW prefetcht0 #endif -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define movsd movlps diff --git a/kernel/x86_64/trsm_kernel_LT_8x4_sse.S b/kernel/x86_64/trsm_kernel_LT_8x4_sse.S index 526a78c57..7727fd591 100644 --- a/kernel/x86_64/trsm_kernel_LT_8x4_sse.S +++ b/kernel/x86_64/trsm_kernel_LT_8x4_sse.S @@ -86,7 +86,7 @@ #define PREFETCHW prefetcht0 #endif -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define movsd movlps diff --git a/kernel/x86_64/trsm_kernel_RT_8x4_sse.S b/kernel/x86_64/trsm_kernel_RT_8x4_sse.S index e96496fd6..699364941 100644 --- a/kernel/x86_64/trsm_kernel_RT_8x4_sse.S +++ b/kernel/x86_64/trsm_kernel_RT_8x4_sse.S @@ -86,7 +86,7 @@ #define PREFETCHW prefetcht0 #endif -#if defined(OPTERON) || defined(BARCELONA) +#if defined(OPTERON) || defined(BARCELONA) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define movsd movlps diff --git a/kernel/x86_64/zgemm_ncopy_2.S b/kernel/x86_64/zgemm_ncopy_2.S index bf318b7ff..8876b61ff 100644 --- a/kernel/x86_64/zgemm_ncopy_2.S +++ b/kernel/x86_64/zgemm_ncopy_2.S @@ -85,7 +85,7 @@ #define movsd movlpd #endif -#if defined(BARCELONA) || defined(SHANGHAI) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define RPREFETCHSIZE 32 #define WPREFETCHSIZE 48 #endif diff --git a/kernel/x86_64/zsymv_L_sse.S b/kernel/x86_64/zsymv_L_sse.S index 6af65a4ba..dcfe83189 100644 --- a/kernel/x86_64/zsymv_L_sse.S +++ b/kernel/x86_64/zsymv_L_sse.S @@ -160,7 +160,7 @@ #define a3 %xmm14 #define xt1 %xmm15 -#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define MOVDDUP(a, b, c) movddup a(b), c #define MOVDDUP2(a, b, c) movddup a##b, c #else diff --git a/kernel/x86_64/zsymv_L_sse2.S b/kernel/x86_64/zsymv_L_sse2.S index 71aca0198..04605e3cb 100644 --- a/kernel/x86_64/zsymv_L_sse2.S +++ b/kernel/x86_64/zsymv_L_sse2.S @@ -76,7 +76,7 @@ #define movsd movlpd #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 16) @@ -167,7 +167,7 @@ #define a3 %xmm14 #define xt1 %xmm15 -#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) +#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define MOVDDUP(a, b, c) movddup a(b), c #define MOVDDUP2(a, b, c) movddup a##b, c #else diff --git a/kernel/x86_64/zsymv_U_sse.S b/kernel/x86_64/zsymv_U_sse.S index 4b8422d82..e8b01ad7a 100644 --- a/kernel/x86_64/zsymv_U_sse.S +++ b/kernel/x86_64/zsymv_U_sse.S @@ -76,7 +76,7 @@ #define movsd movlpd #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 16) @@ -166,7 +166,7 @@ #define xt1 %xmm14 #define xt2 %xmm15 -#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) +#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define MOVDDUP(a, b, c) movddup a(b), c #define MOVDDUP2(a, b, c) movddup a##b, c #else diff --git a/kernel/x86_64/zsymv_U_sse2.S b/kernel/x86_64/zsymv_U_sse2.S index 33667f79e..40246e52e 100644 --- a/kernel/x86_64/zsymv_U_sse2.S +++ b/kernel/x86_64/zsymv_U_sse2.S @@ -76,7 +76,7 @@ #define movsd movlpd #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHSIZE (16 * 16) @@ -166,7 +166,7 @@ #define a3 %xmm14 #define xt1 %xmm15 -#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) +#if (defined(HAVE_SSE3) && !defined(CORE_OPTERON)) || defined(BARCELONA) || defined(SHANGHAI) || defined(BULLDOZER) #define MOVDDUP(a, b, c) movddup a(b), c #define MOVDDUP2(a, b, c) movddup a##b, c #else diff --git a/kernel/x86_64/ztrsm_kernel_LN_2x2_sse2.S b/kernel/x86_64/ztrsm_kernel_LN_2x2_sse2.S index b8caa9a44..79f20b641 100644 --- a/kernel/x86_64/ztrsm_kernel_LN_2x2_sse2.S +++ b/kernel/x86_64/ztrsm_kernel_LN_2x2_sse2.S @@ -86,7 +86,7 @@ #define BORIG 72(%rsp) #define BUFFER 128(%rsp) -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHNTA prefetchnta diff --git a/kernel/x86_64/ztrsm_kernel_LN_4x2_sse.S b/kernel/x86_64/ztrsm_kernel_LN_4x2_sse.S index 2db8cbc5d..f5c100ec1 100644 --- a/kernel/x86_64/ztrsm_kernel_LN_4x2_sse.S +++ b/kernel/x86_64/ztrsm_kernel_LN_4x2_sse.S @@ -95,7 +95,7 @@ #define PREFETCHSIZE (8 * 6 + 4) #endif -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHNTA prefetchnta diff --git a/kernel/x86_64/ztrsm_kernel_LT_2x2_sse2.S b/kernel/x86_64/ztrsm_kernel_LT_2x2_sse2.S index 16c9ca828..18edeed57 100644 --- a/kernel/x86_64/ztrsm_kernel_LT_2x2_sse2.S +++ b/kernel/x86_64/ztrsm_kernel_LT_2x2_sse2.S @@ -86,7 +86,7 @@ #define BORIG 72(%rsp) #define BUFFER 128(%rsp) -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHNTA prefetchnta diff --git a/kernel/x86_64/ztrsm_kernel_LT_4x2_sse.S b/kernel/x86_64/ztrsm_kernel_LT_4x2_sse.S index dbdbfe2e1..f58cecdf5 100644 --- a/kernel/x86_64/ztrsm_kernel_LT_4x2_sse.S +++ b/kernel/x86_64/ztrsm_kernel_LT_4x2_sse.S @@ -95,7 +95,7 @@ #define PREFETCHSIZE (8 * 6 + 4) #endif -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHNTA prefetchnta diff --git a/kernel/x86_64/ztrsm_kernel_RT_2x2_sse2.S b/kernel/x86_64/ztrsm_kernel_RT_2x2_sse2.S index 181cdd29c..1b589e0cf 100644 --- a/kernel/x86_64/ztrsm_kernel_RT_2x2_sse2.S +++ b/kernel/x86_64/ztrsm_kernel_RT_2x2_sse2.S @@ -86,7 +86,7 @@ #define BORIG 72(%rsp) #define BUFFER 128(%rsp) -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHNTA prefetchnta diff --git a/kernel/x86_64/ztrsm_kernel_RT_4x2_sse.S b/kernel/x86_64/ztrsm_kernel_RT_4x2_sse.S index c28d02927..2c47ce3fd 100644 --- a/kernel/x86_64/ztrsm_kernel_RT_4x2_sse.S +++ b/kernel/x86_64/ztrsm_kernel_RT_4x2_sse.S @@ -95,7 +95,7 @@ #define PREFETCHSIZE (8 * 6 + 4) #endif -#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(OPTERON) || defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define PREFETCH prefetch #define PREFETCHW prefetchw #define PREFETCHNTA prefetchnta diff --git a/l1param.h b/l1param.h index 6fe756f17..0b216c7c5 100644 --- a/l1param.h +++ b/l1param.h @@ -74,6 +74,13 @@ #define ALIGNED_ACCESS #endif +#ifdef BULLDOZER +#define PREFETCH prefetch +#define PREFETCHW prefetchw +#define PREFETCHSIZE (128 * 5) +#define ALIGNED_ACCESS +#endif + #ifdef NANO #define PREFETCH prefetcht0 #define PREFETCHW prefetcht0 diff --git a/l2param.h b/l2param.h index cdbd8805e..01fe7943d 100644 --- a/l2param.h +++ b/l2param.h @@ -85,7 +85,7 @@ #define movsd movlps #endif -#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) +#if defined(BARCELONA) || defined(SHANGHAI) || defined(BOBCAT) || defined(BULLDOZER) #define ALIGNED_ACCESS #define MOVUPS_A movaps #define MOVUPS_XL movaps