Fixed #276. Merge branch 'wernsaar-develop' into bulldozer
This commit is contained in:
commit
49faee1a51
|
@ -54,9 +54,8 @@ STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
#DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S
|
DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S
|
||||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S
|
||||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
@ -69,21 +68,4 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
#STRMMKERNEL = ../generic/trmmkernel_16x2.c
|
|
||||||
STRMMKERNEL = sgemm_kernel_16x2_bulldozer.S
|
|
||||||
#STRMMKERNEL_RT = ../generic/trmmkernel_16x2.c
|
|
||||||
#STRMMKERNEL_RN = ../generic/trmmkernel_16x2.c
|
|
||||||
|
|
||||||
DTRMMKERNEL = dgemm_kernel_8x2_bulldozer.S
|
|
||||||
#DTRMMKERNEL_RT = ../generic/trmmkernel_8x2.c
|
|
||||||
#DTRMMKERNEL_RN = ../generic/trmmkernel_8x2.c
|
|
||||||
|
|
||||||
CTRMMKERNEL = cgemm_kernel_4x2_bulldozer.S
|
|
||||||
|
|
||||||
ZTRMMKERNEL = zgemm_kernel_2x2_bulldozer.S
|
|
||||||
#ZTRMMKERNEL = ../generic/ztrmmkernel_4x2.c
|
|
||||||
#ZTRMMKERNEL_RR = ../generic/ztrmmkernel_2x2.c
|
|
||||||
#ZTRMMKERNEL_RC = ../generic/ztrmmkernel_2x2.c
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -84,6 +84,9 @@
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define A_PR1 384
|
||||||
|
#define B_PR1 192
|
||||||
|
|
||||||
|
|
||||||
.macro KERNEL8x2_SUB
|
.macro KERNEL8x2_SUB
|
||||||
vmovddup -16*SIZE(BO,%rax,2), %xmm1
|
vmovddup -16*SIZE(BO,%rax,2), %xmm1
|
||||||
|
@ -708,9 +711,14 @@
|
||||||
ALIGN_4
|
ALIGN_4
|
||||||
|
|
||||||
.L52:
|
.L52:
|
||||||
|
prefetcht0 A_PR1(AO,%rax,8)
|
||||||
|
prefetcht0 B_PR1(BO,%rax,2)
|
||||||
KERNEL8x2_SUB
|
KERNEL8x2_SUB
|
||||||
|
prefetcht0 A_PR1(AO,%rax,8)
|
||||||
KERNEL8x2_SUB
|
KERNEL8x2_SUB
|
||||||
|
prefetcht0 A_PR1(AO,%rax,8)
|
||||||
KERNEL8x2_SUB
|
KERNEL8x2_SUB
|
||||||
|
prefetcht0 A_PR1(AO,%rax,8)
|
||||||
KERNEL8x2_SUB
|
KERNEL8x2_SUB
|
||||||
|
|
||||||
jl .L52
|
jl .L52
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue