Update dgemm_kernel_4x8_haswell.S

This commit is contained in:
wjc404 2019-07-17 23:47:30 +08:00 committed by GitHub
parent 211ab03b14
commit 8a074b3965
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 37 additions and 5 deletions

View File

@ -267,24 +267,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro SAVE4x12 .macro SAVE4x12
prefetcht0 128(%rsp) /*BUFFER 1*/ prefetcht0 BUFFER1
vbroadcastsd ALPHA, %ymm0 vbroadcastsd ALPHA, %ymm0
vmulpd %ymm0 , %ymm4 , %ymm4 vmulpd %ymm0 , %ymm4 , %ymm4
vmulpd %ymm0 , %ymm5 , %ymm5 vmulpd %ymm0 , %ymm5 , %ymm5
vmulpd %ymm0 , %ymm6 , %ymm6 vmulpd %ymm0 , %ymm6 , %ymm6
vmulpd %ymm0 , %ymm7 , %ymm7 vmulpd %ymm0 , %ymm7 , %ymm7
prefetcht0 192(%rsp) prefetcht0 64 + BUFFER1
vmulpd %ymm0 , %ymm8 , %ymm8 vmulpd %ymm0 , %ymm8 , %ymm8
vmulpd %ymm0 , %ymm9 , %ymm9 vmulpd %ymm0 , %ymm9 , %ymm9
vmulpd %ymm0 , %ymm10, %ymm10 vmulpd %ymm0 , %ymm10, %ymm10
vmulpd %ymm0 , %ymm11, %ymm11 vmulpd %ymm0 , %ymm11, %ymm11
prefetcht0 256(%rsp) prefetcht0 128 + BUFFER1
vmulpd %ymm0 , %ymm12, %ymm12 vmulpd %ymm0 , %ymm12, %ymm12
vmulpd %ymm0 , %ymm13, %ymm13 vmulpd %ymm0 , %ymm13, %ymm13
vmulpd %ymm0 , %ymm14, %ymm14 vmulpd %ymm0 , %ymm14, %ymm14
vmulpd %ymm0 , %ymm15, %ymm15 vmulpd %ymm0 , %ymm15, %ymm15
prefetcht0 320(%rsp) prefetcht0 192 + BUFFER1
vpermilpd $ 0x05 , %ymm5, %ymm5 vpermilpd $ 0x05 , %ymm5, %ymm5
vpermilpd $ 0x05 , %ymm7, %ymm7 vpermilpd $ 0x05 , %ymm7, %ymm7
@ -1606,6 +1606,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.endm .endm
.macro PREFETCHT0_C
prefetcht0 (CO1)
prefetcht0 24(CO1)
prefetcht0 (CO1,LDC,4)
prefetcht0 24(CO1,LDC,4)
prefetcht0 (CO1,LDC,8)
prefetcht0 24(CO1,LDC,8)
addq LDC,CO1
prefetcht0 (CO1)
prefetcht0 24(CO1)
prefetcht0 (CO1,LDC,4)
prefetcht0 24(CO1,LDC,4)
prefetcht0 (CO1,LDC,8)
prefetcht0 24(CO1,LDC,8)
leaq (CO1,LDC,2),CO1
prefetcht0 (CO1)
prefetcht0 24(CO1)
prefetcht0 (CO1,LDC,4)
prefetcht0 24(CO1,LDC,4)
prefetcht0 (CO1,LDC,8)
prefetcht0 24(CO1,LDC,8)
subq LDC,CO1
prefetcht0 (CO1)
prefetcht0 24(CO1)
prefetcht0 (CO1,LDC,4)
prefetcht0 24(CO1,LDC,4)
prefetcht0 (CO1,LDC,8)
prefetcht0 24(CO1,LDC,8)
subq LDC,CO1
subq LDC,CO1
.endm
/*******************************************************************************************/ /*******************************************************************************************/
#if !defined(TRMMKERNEL) #if !defined(TRMMKERNEL)
@ -1773,7 +1804,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dec %rax dec %rax
jne .L12_12 jne .L12_12
PREFETCHT0_C
.L12_12a: .L12_12a:
KERNEL4x12_M1 KERNEL4x12_M1