1d33547222 
								
							 
						 
						
							
							
								
								optimized zgemm kernel for haswell  
							
							
							
						 
						
							2014-07-27 11:51:42 +02:00  
				
					
						
							
							
								 
						
							
								3ea4dadd30 
								
							 
						 
						
							
							
								
								optimizations for trsm  
							
							
							
						 
						
							2014-07-25 11:59:17 +02:00  
				
					
						
							
							
								 
						
							
								1b10ff129a 
								
							 
						 
						
							
							
								
								optimizations for trmm  
							
							
							
						 
						
							2014-07-25 10:00:23 +02:00  
				
					
						
							
							
								 
						
							
								125610d23b 
								
							 
						 
						
							
							
								
								allow to set custom value for ?GEMM_DEFAULT_UNROLL_MN, optimizations for syrk  
							
							
							
						 
						
							2014-07-24 18:43:31 +02:00  
				
					
						
							
							
								 
						
							
								be94db096c 
								
							 
						 
						
							
							
								
								disabled *3M functions for x86_64 platforms  
							
							
							
						 
						
							2014-07-01 16:18:05 +02:00  
				
					
						
							
							
								 
						
							
								6c2ead30f0 
								
							 
						 
						
							
							
								
								Remove all trailing whitespace except lapack-netlib  
							
							... 
							
							
							
							Signed-off-by: Timothy Gu <timothygu99@gmail.com> 
							
						 
						
							2014-06-27 12:05:18 -07:00  
				
					
						
							
							
								 
						
							
								c947ab85dc 
								
							 
						 
						
							
							
								
								changed level3.c  
							
							
							
						 
						
							2013-12-01 13:46:30 +01:00  
				
					
						
							
							
								 
						
							
								2840d56aeb 
								
							 
						 
						
							
							
								
								added dgemm_kernel for Piledriver  
							
							
							
						 
						
							2013-10-19 09:47:15 +02:00  
				
					
						
							
							
								 
						
							
								77b572fa0b 
								
							 
						 
						
							
							
								
								Merge branch 'loongson3a' into develop  
							
							... 
							
							
							
							Conflicts:
	Makefile.system 
							
						 
						
							2013-07-20 22:33:17 +08:00  
				
					
						
							
							
								 
						
							
								32d2ca3035 
								
							 
						 
						
							
							
								
								Refs  #214 ,  #221 ,  #246 . Fixed the getrf overflow bug on Windows.  
							
							... 
							
							
							
							I used a smaller threshold since the stack size is 1MB on windows. 
							
						 
						
							2013-07-11 03:20:02 +08:00  
				
					
						
							
							
								 
						
							
								6f008abcef 
								
							 
						 
						
							
							
								
								replaced defined(DOUBLE) by !defined(XDOUBLE)  
							
							
							
						 
						
							2013-07-09 18:17:50 +02:00  
				
					
						
							
							
								 
						
							
								5d3312142a 
								
							 
						 
						
							
							
								
								Refs  #221   #246 . Fixed the overflowing stack bug in mutlithreading BLAS3.  
							
							... 
							
							
							
							When NUM_THREADS(MAX_CPU_NUNBERS) is very large ,e.g. 256.
typedef struct {
  volatile BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
} job_t;
job_t          job[MAX_CPU_NUMBER];
The job array is equal 8MB.
Thus, We use malloc instead of stack allocation. 
							
						 
						
							2013-07-08 01:07:05 +08:00  
				
					
						
							
							
								 
						
							
								25491e42f9 
								
							 
						 
						
							
							
								
								New dgemm kernel for BULLDOZER: dgemm_kernel_8x2_bulldozer.S  
							
							
							
						 
						
							2013-06-08 09:40:17 +02:00  
				
					
						
							
							
								 
						
							
								6b01d58712 
								
							 
						 
						
							
							
								
								Disable the optimization of muli-threading gemm on the Loongson3A.  
							
							
							
						 
						
							2013-03-30 20:12:43 +00:00  
				
					
						
							
							
								 
						
							
								8163ab7e55 
								
							 
						 
						
							
							
								
								Change the block size on Loongson 3B.  
							
							
							
						 
						
							2011-11-23 18:41:49 +00:00  
				
					
						
							
							
								 
						
							
								9fe3049de6 
								
							 
						 
						
							
							
								
								Adding conditional compilation(#if defined(LOONGSON3A)) to avoid affecting the performance of other platforms.  
							
							
							
						 
						
							2011-09-26 15:21:45 +00:00  
				
					
						
							
							
								 
						
							
								831858b883 
								
							 
						 
						
							
							
								
								Modify aligned address of sa and sb to improve the performance of multi-threads.  
							
							
							
						 
						
							2011-09-23 20:59:48 +00:00  
				
					
						
							
							
								 
						
							
								1b97ec1a7c 
								
							 
						 
						
							
							
								
								Added DEBUG option in Makefile.rule. Fixed DEBUG typo mistakes.  
							
							
							
						 
						
							2011-02-26 11:19:54 +08:00  
				
					
						
							
							
								 
						
							
								342bbc3871 
								
							 
						 
						
							
							
								
								Import GotoBLAS2 1.13 BSD version codes.  
							
							
							
						 
						
							2011-01-24 14:54:24 +00:00