From 44d23881b5e68fe7fcdfa9f8d0e9a9d9fac78090 Mon Sep 17 00:00:00 2001 From: wernsaar Date: Mon, 5 Aug 2013 11:27:16 +0200 Subject: [PATCH] dtrsm_kernel_LT_8x2_bulldozer.S performance optimization --- kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S b/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S index 12bca72b2..374f45096 100644 --- a/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S +++ b/kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S @@ -84,6 +84,9 @@ #endif +#define A_PR1 384 +#define B_PR1 192 + .macro KERNEL8x2_SUB vmovddup -16*SIZE(BO,%rax,2), %xmm1 @@ -708,9 +711,14 @@ ALIGN_4 .L52: + prefetcht0 A_PR1(AO,%rax,8) + prefetcht0 B_PR1(BO,%rax,2) KERNEL8x2_SUB + prefetcht0 A_PR1(AO,%rax,8) KERNEL8x2_SUB + prefetcht0 A_PR1(AO,%rax,8) KERNEL8x2_SUB + prefetcht0 A_PR1(AO,%rax,8) KERNEL8x2_SUB jl .L52