diff --git a/kernel/x86_64/dgemm_kernel_4x8_haswell.S b/kernel/x86_64/dgemm_kernel_4x8_haswell.S index 3f7f9a98e..5242e3efe 100644 --- a/kernel/x86_64/dgemm_kernel_4x8_haswell.S +++ b/kernel/x86_64/dgemm_kernel_4x8_haswell.S @@ -267,24 +267,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro SAVE4x12 - prefetcht0 128(%rsp) /*BUFFER 1*/ + prefetcht0 BUFFER1 vbroadcastsd ALPHA, %ymm0 vmulpd %ymm0 , %ymm4 , %ymm4 vmulpd %ymm0 , %ymm5 , %ymm5 vmulpd %ymm0 , %ymm6 , %ymm6 vmulpd %ymm0 , %ymm7 , %ymm7 - prefetcht0 192(%rsp) + prefetcht0 64 + BUFFER1 vmulpd %ymm0 , %ymm8 , %ymm8 vmulpd %ymm0 , %ymm9 , %ymm9 vmulpd %ymm0 , %ymm10, %ymm10 vmulpd %ymm0 , %ymm11, %ymm11 - prefetcht0 256(%rsp) + prefetcht0 128 + BUFFER1 vmulpd %ymm0 , %ymm12, %ymm12 vmulpd %ymm0 , %ymm13, %ymm13 vmulpd %ymm0 , %ymm14, %ymm14 vmulpd %ymm0 , %ymm15, %ymm15 - prefetcht0 320(%rsp) + prefetcht0 192 + BUFFER1 vpermilpd $ 0x05 , %ymm5, %ymm5 vpermilpd $ 0x05 , %ymm7, %ymm7 @@ -1606,6 +1606,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .endm +.macro PREFETCHT0_C + prefetcht0 (CO1) + prefetcht0 24(CO1) + prefetcht0 (CO1,LDC,4) + prefetcht0 24(CO1,LDC,4) + prefetcht0 (CO1,LDC,8) + prefetcht0 24(CO1,LDC,8) + addq LDC,CO1 + prefetcht0 (CO1) + prefetcht0 24(CO1) + prefetcht0 (CO1,LDC,4) + prefetcht0 24(CO1,LDC,4) + prefetcht0 (CO1,LDC,8) + prefetcht0 24(CO1,LDC,8) + leaq (CO1,LDC,2),CO1 + prefetcht0 (CO1) + prefetcht0 24(CO1) + prefetcht0 (CO1,LDC,4) + prefetcht0 24(CO1,LDC,4) + prefetcht0 (CO1,LDC,8) + prefetcht0 24(CO1,LDC,8) + subq LDC,CO1 + prefetcht0 (CO1) + prefetcht0 24(CO1) + prefetcht0 (CO1,LDC,4) + prefetcht0 24(CO1,LDC,4) + prefetcht0 (CO1,LDC,8) + prefetcht0 24(CO1,LDC,8) + subq LDC,CO1 + subq LDC,CO1 +.endm /*******************************************************************************************/ #if !defined(TRMMKERNEL) @@ -1773,7 +1804,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. dec %rax jne .L12_12 - + + PREFETCHT0_C .L12_12a: KERNEL4x12_M1