From 94db259e5b432a7f1769c1d61071b9dd727778db Mon Sep 17 00:00:00 2001 From: wjc404 <52632443+wjc404@users.noreply.github.com> Date: Sat, 20 Jul 2019 22:04:41 +0800 Subject: [PATCH] Add files via upload --- kernel/x86_64/dgemm_kernel_4x8_haswell.S | 45 ++++++++++-------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/kernel/x86_64/dgemm_kernel_4x8_haswell.S b/kernel/x86_64/dgemm_kernel_4x8_haswell.S index 6d1460bb2..6a8619e32 100644 --- a/kernel/x86_64/dgemm_kernel_4x8_haswell.S +++ b/kernel/x86_64/dgemm_kernel_4x8_haswell.S @@ -1622,35 +1622,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro PREFETCHT0_C + prefetcht0 ALPHA prefetcht0 (CO1) prefetcht0 24(CO1) prefetcht0 (CO1,LDC,4) prefetcht0 24(CO1,LDC,4) prefetcht0 (CO1,LDC,8) prefetcht0 24(CO1,LDC,8) - addq LDC,CO1 - prefetcht0 (CO1) - prefetcht0 24(CO1) - prefetcht0 (CO1,LDC,4) - prefetcht0 24(CO1,LDC,4) - prefetcht0 (CO1,LDC,8) - prefetcht0 24(CO1,LDC,8) - leaq (CO1,LDC,2),CO1 - prefetcht0 (CO1) - prefetcht0 24(CO1) - prefetcht0 (CO1,LDC,4) - prefetcht0 24(CO1,LDC,4) - prefetcht0 (CO1,LDC,8) - prefetcht0 24(CO1,LDC,8) - subq LDC,CO1 - prefetcht0 (CO1) - prefetcht0 24(CO1) - prefetcht0 (CO1,LDC,4) - prefetcht0 24(CO1,LDC,4) - prefetcht0 (CO1,LDC,8) - prefetcht0 24(CO1,LDC,8) - subq LDC,CO1 - subq LDC,CO1 .endm /*******************************************************************************************/ @@ -1820,12 +1798,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. dec %rax jne .L12_12 - PREFETCHT0_C .L12_12a: - + PREFETCHT0_C + addq LDC,CO1 KERNEL4x12_M1 + PREFETCHT0_C + leaq (CO1,LDC,2),CO1 KERNEL4x12_M2 + PREFETCHT0_C + subq LDC,CO1 KERNEL4x12_M1 + PREFETCHT0_C + subq LDC,CO1 + subq LDC,CO1 KERNEL4x12_M2 KERNEL4x12_M1 @@ -2133,9 +2118,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .L13_12a: + PREFETCHT0_C + addq LDC,CO1 KERNEL4x12_M1 + PREFETCHT0_C + leaq (CO1,LDC,2),CO1 KERNEL4x12_M2 + PREFETCHT0_C + subq LDC,CO1 KERNEL4x12_M1 + PREFETCHT0_C + subq LDC,CO1 + subq LDC,CO1 KERNEL4x12_M2 KERNEL4x12_M1 @@ -2145,7 +2139,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. jmp .L13_16 - PREFETCHT0_C .L13_13: test $1, %rax