Fixed #276. Merge branch 'wernsaar-develop' into bulldozer
This commit is contained in:
		
						commit
						49faee1a51
					
				| 
						 | 
				
			
			@ -54,9 +54,8 @@ STRSMKERNEL_RN  =  ../generic/trsm_kernel_RN.c
 | 
			
		|||
STRSMKERNEL_RT  =  ../generic/trsm_kernel_RT.c
 | 
			
		||||
 | 
			
		||||
DTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
 | 
			
		||||
#DTRSMKERNEL_LT  = dtrsm_kernel_LT_8x2_bulldozer.S
 | 
			
		||||
DTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
 | 
			
		||||
DTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
 | 
			
		||||
DTRSMKERNEL_LT  = dtrsm_kernel_LT_8x2_bulldozer.S
 | 
			
		||||
DTRSMKERNEL_RN  = dtrsm_kernel_RN_8x2_bulldozer.S
 | 
			
		||||
DTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
 | 
			
		||||
 | 
			
		||||
CTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c
 | 
			
		||||
| 
						 | 
				
			
			@ -69,21 +68,4 @@ ZTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c
 | 
			
		|||
ZTRSMKERNEL_RN  = ../generic/trsm_kernel_RN.c
 | 
			
		||||
ZTRSMKERNEL_RT  = ../generic/trsm_kernel_RT.c
 | 
			
		||||
 | 
			
		||||
#STRMMKERNEL	= ../generic/trmmkernel_16x2.c
 | 
			
		||||
STRMMKERNEL	= sgemm_kernel_16x2_bulldozer.S
 | 
			
		||||
#STRMMKERNEL_RT	= ../generic/trmmkernel_16x2.c
 | 
			
		||||
#STRMMKERNEL_RN	= ../generic/trmmkernel_16x2.c
 | 
			
		||||
 | 
			
		||||
DTRMMKERNEL	= dgemm_kernel_8x2_bulldozer.S
 | 
			
		||||
#DTRMMKERNEL_RT	= ../generic/trmmkernel_8x2.c
 | 
			
		||||
#DTRMMKERNEL_RN	= ../generic/trmmkernel_8x2.c
 | 
			
		||||
 | 
			
		||||
CTRMMKERNEL	= cgemm_kernel_4x2_bulldozer.S
 | 
			
		||||
 | 
			
		||||
ZTRMMKERNEL	= zgemm_kernel_2x2_bulldozer.S
 | 
			
		||||
#ZTRMMKERNEL	= ../generic/ztrmmkernel_4x2.c
 | 
			
		||||
#ZTRMMKERNEL_RR	= ../generic/ztrmmkernel_2x2.c
 | 
			
		||||
#ZTRMMKERNEL_RC	= ../generic/ztrmmkernel_2x2.c
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -84,6 +84,9 @@
 | 
			
		|||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#define A_PR1   384
 | 
			
		||||
#define B_PR1   192
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
.macro KERNEL8x2_SUB
 | 
			
		||||
	vmovddup	-16*SIZE(BO,%rax,2), %xmm1
 | 
			
		||||
| 
						 | 
				
			
			@ -708,9 +711,14 @@
 | 
			
		|||
	ALIGN_4
 | 
			
		||||
 | 
			
		||||
.L52:
 | 
			
		||||
	prefetcht0      A_PR1(AO,%rax,8)
 | 
			
		||||
	prefetcht0      B_PR1(BO,%rax,2)
 | 
			
		||||
	KERNEL8x2_SUB
 | 
			
		||||
	prefetcht0      A_PR1(AO,%rax,8)
 | 
			
		||||
	KERNEL8x2_SUB
 | 
			
		||||
	prefetcht0      A_PR1(AO,%rax,8)
 | 
			
		||||
	KERNEL8x2_SUB
 | 
			
		||||
	prefetcht0      A_PR1(AO,%rax,8)
 | 
			
		||||
	KERNEL8x2_SUB
 | 
			
		||||
 | 
			
		||||
	jl	.L52
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Loading…
	
		Reference in New Issue