3716267124 
								
							 
						 
						
							
							
								
								Change _STDC_VERSION__ to __STDC_VERSION__  
							
							... 
							
							
							
							Change-Id: Id3fa4e8d9eedd4ef7230df69b611e7f397301a42 
							
						 
						
							2018-05-11 12:15:08 +08:00  
				
					
						
							
							
								 
						
							
								6a99fcce94 
								
							 
						 
						
							
							
								
								Use _Atomic instead of volatile for thread safety where C11 is supported  
							
							... 
							
							
							
							Suggested by dodomorandi in #660  
							
						 
						
							2018-03-10 00:03:49 +01:00  
				
					
						
							
							
								 
						
							
								11a627c54e 
								
							 
						 
						
							
							
								
								remove surplus parentheses to silence clang5  
							
							
							
						 
						
							2018-01-01 20:56:26 +01:00  
				
					
						
							
							
								 
						
							
								30486a356c 
								
							 
						 
						
							
							
								
								Reduce number of data partitions in n.  
							
							
							
						 
						
							2017-10-04 12:37:49 -07:00  
				
					
						
							
							
								 
						
							
								9de52b489a 
								
							 
						 
						
							
							
								
								Cleaning up and documenting multi-threaded GEMM code.  
							
							
							
						 
						
							2017-10-03 16:32:08 -07:00  
				
					
						
							
							
								 
						
							
								860dcfc703 
								
							 
						 
						
							
							
								
								Use 2D thread distribution for small GEMMs.  
							
							... 
							
							
							
							Allows maximum use of available cores if one of M and N is small and the other is large. 
							
						 
						
							2017-10-03 13:43:39 -07:00  
				
					
						
							
							
								 
						
							
								6aaa107865 
								
							 
						 
						
							
							
								
								Reducing threads for multi-threaded GEMMs on small matrices.  
							
							
							
						 
						
							2017-09-27 19:25:33 -07:00  
				
					
						
							
							
								 
						
							
								a2672d5589 
								
							 
						 
						
							
							
								
								prepared driver/level3 functions for UNROLL values, that are not a power of two  
							
							
							
						 
						
							2017-01-09 10:38:15 +01:00  
				
					
						
							
							
								 
						
							
								b07d733a71 
								
							 
						 
						
							
							
								
								added updates for syrk and syr2k  
							
							
							
						 
						
							2016-01-21 13:16:44 +01:00  
				
					
						
							
							
								 
						
							
								fbc21266e6 
								
							 
						 
						
							
							
								
								Minor C code fixes in driver/  
							
							
							
						 
						
							2015-11-09 14:15:49 +05:30  
				
					
						
							
							
								 
						
							
								1d33547222 
								
							 
						 
						
							
							
								
								optimized zgemm kernel for haswell  
							
							
							
						 
						
							2014-07-27 11:51:42 +02:00  
				
					
						
							
							
								 
						
							
								6c2ead30f0 
								
							 
						 
						
							
							
								
								Remove all trailing whitespace except lapack-netlib  
							
							... 
							
							
							
							Signed-off-by: Timothy Gu <timothygu99@gmail.com> 
							
						 
						
							2014-06-27 12:05:18 -07:00  
				
					
						
							
							
								 
						
							
								c947ab85dc 
								
							 
						 
						
							
							
								
								changed level3.c  
							
							
							
						 
						
							2013-12-01 13:46:30 +01:00  
				
					
						
							
							
								 
						
							
								2840d56aeb 
								
							 
						 
						
							
							
								
								added dgemm_kernel for Piledriver  
							
							
							
						 
						
							2013-10-19 09:47:15 +02:00  
				
					
						
							
							
								 
						
							
								32d2ca3035 
								
							 
						 
						
							
							
								
								Refs  #214 ,  #221 ,  #246 . Fixed the getrf overflow bug on Windows.  
							
							... 
							
							
							
							I used a smaller threshold since the stack size is 1MB on windows. 
							
						 
						
							2013-07-11 03:20:02 +08:00  
				
					
						
							
							
								 
						
							
								6f008abcef 
								
							 
						 
						
							
							
								
								replaced defined(DOUBLE) by !defined(XDOUBLE)  
							
							
							
						 
						
							2013-07-09 18:17:50 +02:00  
				
					
						
							
							
								 
						
							
								5d3312142a 
								
							 
						 
						
							
							
								
								Refs  #221   #246 . Fixed the overflowing stack bug in mutlithreading BLAS3.  
							
							... 
							
							
							
							When NUM_THREADS(MAX_CPU_NUNBERS) is very large ,e.g. 256.
typedef struct {
  volatile BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
} job_t;
job_t          job[MAX_CPU_NUMBER];
The job array is equal 8MB.
Thus, We use malloc instead of stack allocation. 
							
						 
						
							2013-07-08 01:07:05 +08:00  
				
					
						
							
							
								 
						
							
								25491e42f9 
								
							 
						 
						
							
							
								
								New dgemm kernel for BULLDOZER: dgemm_kernel_8x2_bulldozer.S  
							
							
							
						 
						
							2013-06-08 09:40:17 +02:00  
				
					
						
							
							
								 
						
							
								342bbc3871 
								
							 
						 
						
							
							
								
								Import GotoBLAS2 1.13 BSD version codes.  
							
							
							
						 
						
							2011-01-24 14:54:24 +00:00