4de545aa7d 
								
							 
						 
						
							
							
								
								address minor warnings from gcc7  
							
							
							
						 
						
							2019-09-07 10:21:08 +03:00  
				
					
						
							
							
								 
						
							
								575a84398a 
								
							 
						 
						
							
							
								
								remove redundant code  #2113  
							
							
							
						 
						
							2019-05-07 23:46:54 +03:00  
				
					
						
							
							
								 
						
							
								e882b239aa 
								
							 
						 
						
							
							
								
								Correct naming of getrf_parallel object  
							
							... 
							
							
							
							fixes  #1984  
						
							2019-01-26 00:45:45 +01:00  
				
					
						
							
							
								 
						
							
								3716267124 
								
							 
						 
						
							
							
								
								Change _STDC_VERSION__ to __STDC_VERSION__  
							
							... 
							
							
							
							Change-Id: Id3fa4e8d9eedd4ef7230df69b611e7f397301a42 
							
						 
						
							2018-05-11 12:15:08 +08:00  
				
					
						
							
							
								 
						
							
								c167a3d6f4 
								
							 
						 
						
							
							
								
								Added RISCV build  
							
							
							
						 
						
							2018-04-16 14:08:31 -07:00  
				
					
						
							
							
								 
						
							
								20c6c38e51 
								
							 
						 
						
							
							
								
								Merge branch 'develop' into atomic  
							
							
							
						 
						
							2018-04-07 12:09:39 +02:00  
				
					
						
							
							
								 
						
							
								8ec28ff461 
								
							 
						 
						
							
							
								
								Remove unguarded use of _Atomic and fix tabbing  
							
							
							
						 
						
							2018-04-04 22:40:30 +02:00  
				
					
						
							
							
								 
						
							
								bb9876db33 
								
							 
						 
						
							
							
								
								Fix thread races and infinite looping on systems with many cpus  
							
							... 
							
							
							
							On systems with more than 64 cpus, blas_quickdivide will sometimes return zero which creates bogus workloads when used for the stride calculation. This then leads to threads spinning incessantly waiting for a status change that never happens, as seen in #1497 .
This patch also fixes several data races that were found by helgrind and/or tsan while debugging the issue. 
							
						 
						
							2018-04-04 18:16:52 +02:00  
				
					
						
							
							
								 
						
							
								40160ff3c1 
								
							 
						 
						
							
							
								
								Use _Atomic instead of volatile for thread safety where C11 is supported  
							
							
							
						 
						
							2018-03-10 00:15:44 +01:00  
				
					
						
							
							
								 
						
							
								9fa986337d 
								
							 
						 
						
							
							
								
								add missing brackets to silence indentation warnings gcc721  
							
							
							
						 
						
							2018-01-19 23:11:12 +01:00  
				
					
						
							
							
								 
						
							
								d602b99386 
								
							 
						 
						
							
							
								
								LAPACK helpers in C that need care too  
							
							
							
						 
						
							2018-01-02 14:38:50 +01:00  
				
					
						
							
							
								 
						
							
								c7a8512d12 
								
							 
						 
						
							
							
								
								Cmake fixes for DYNAMIC_ARCH builds and whitespace in path names ( #1323 )  
							
							... 
							
							
							
							* prebuild.cmake: Put quotes around path names that may contain whitespace
(Copied from alexkaratakis' PR #1295 )
* kernel/CMakeLists.txt: Fix common_lapack header inclusion and DYNAMIC_ARCH generation of ?neg_tcopy and ?laswp_ncopy files
* lapack/CMakeLists.txt: Use correct template for ?laswp_(plus,minus) functions 
							
						 
						
							2017-10-09 23:34:18 +02:00  
				
					
						
							
							
								 
						
							
								37858d1146 
								
							 
						 
						
							
							
								
								Fix threading usage in CMake: s/SMP/USE_THREAD/  
							
							
							
						 
						
							2017-08-19 15:07:42 +10:00  
				
					
						
							
							
								 
						
							
								d245caa49a 
								
							 
						 
						
							
							
								
								Support out-of-source build  
							
							
							
						 
						
							2017-08-01 15:16:14 +05:30  
				
					
						
							
							
								 
						
							
								56762d5e4c 
								
							 
						 
						
							
							
								
								add lapack laswp for zarch  
							
							
							
						 
						
							2017-04-13 15:38:59 +02:00  
				
					
						
							
							
								 
						
							
								3918d17025 
								
							 
						 
						
							
							
								
								LAPACK: Fix lapack-test errors in ARM64 threaded version  
							
							
							
						 
						
							2017-01-31 23:36:23 +05:30  
				
					
						
							
							
								 
						
							
								209b63197e 
								
							 
						 
						
							
							
								
								prepared lapack/lauum for UNROLL values, that are not a power of two  
							
							
							
						 
						
							2017-01-11 07:29:17 +01:00  
				
					
						
							
							
								 
						
							
								c81dc6322f 
								
							 
						 
						
							
							
								
								prepared lapack/potrf functions for UNROLL values, that are not a power of two  
							
							
							
						 
						
							2017-01-10 10:50:28 +01:00  
				
					
						
							
							
								 
						
							
								3e1bbd6b5f 
								
							 
						 
						
							
							
								
								prepared lapack/getrf functions for UNROLL values, that are not a power of two  
							
							
							
						 
						
							2017-01-09 12:57:26 +01:00  
				
					
						
							
							
								 
						
							
								053044ae4d 
								
							 
						 
						
							
							
								
								Replace CMAKE_SOURCE_DIR/CMAKE_BINARY_DIR with PROJECT_SOURCE_DIR/PROJECT_BINARY_DIR  
							
							... 
							
							
							
							If OpenBLAS is built using add_subdirectory(OpenBlas) as part of another project
then the paths set by CMAKE_XXX_DIR are relative to the parent project
and not the OpenBLAS project. 
							
						 
						
							2016-05-25 09:13:28 +02:00  
				
					
						
							
							
								 
						
							
								956be69e1d 
								
							 
						 
						
							
							
								
								optimized getrf_single.c for POWER8  
							
							
							
						 
						
							2016-05-17 16:19:53 +02:00  
				
					
						
							
							
								 
						
							
								6a2bde7a2d 
								
							 
						 
						
							
							
								
								optimized dgemm and dgetrf for POWER8  
							
							
							
						 
						
							2016-05-17 14:45:27 +02:00  
				
					
						
							
							
								 
						
							
								2c3dfe2bf3 
								
							 
						 
						
							
							
								
								MIPS P5600(32 bit) and I6400(64 bit) cores support added.  
							
							... 
							
							
							
							Seperated mips and mips64 files.
Configurations support for mips 32 bit.
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com> 
							
						 
						
							2016-04-22 14:03:18 +05:30  
				
					
						
							
							
								 
						
							
								4e1b521e27 
								
							 
						 
						
							
							
								
								Fix lapack complex implementation of lauu2 and potf2 for Android (use FLOAT instead of FLOAT[2] as imaginary part is not used).  
							
							
							
						 
						
							2016-02-04 16:59:56 -05:00  
				
					
						
							
							
								 
						
							
								13f0f8c10e 
								
							 
						 
						
							
							
								
								Refs  #723 . Avoid out of boundary for getf2.  
							
							
							
						 
						
							2016-01-26 09:14:57 -06:00  
				
					
						
							
							
								 
						
							
								0553476fba 
								
							 
						 
						
							
							
								
								Added TRANS defines for complex sources in lapack.  
							
							
							
						 
						
							2015-02-24 14:30:35 -06:00  
				
					
						
							
							
								 
						
							
								0d8e227ea7 
								
							 
						 
						
							
							
								
								Changed strategy for setting preprocessor definitions.  
							
							... 
							
							
							
							Instead of generating separate object files for each permutation of
defines for a source file, GenerateNamedObjects now writes an entirely
new source file and inserts the defines as #define c statements.
This solves a problem I ran into with ar.exe where it was refusing to
link objects that had the same filename despite having different paths. 
							
						 
						
							2015-02-24 12:26:33 -06:00  
				
					
						
							
							
								 
						
							
								f3f2b3d768 
								
							 
						 
						
							
							
								
								Added complex and single netlib-lapack fortran sources to lapack.cmake.  
							
							
							
						 
						
							2015-02-19 12:26:11 -06:00  
				
					
						
							
							
								 
						
							
								67e39bd8fb 
								
							 
						 
						
							
							
								
								Added mangled complex filenames to interface and lapack CMakeLists.txt.  
							
							
							
						 
						
							2015-02-17 13:12:30 -06:00  
				
					
						
							
							
								 
						
							
								4662a0b13a 
								
							 
						 
						
							
							
								
								Changed generate functions to iterate through a list of float types.  
							
							... 
							
							
							
							This will generate obj files for SINGLE/DOUBLE/COMPLEX/DOUBLE COMPLEX. 
							
						 
						
							2015-02-15 17:44:37 -06:00  
				
					
						
							
							
								 
						
							
								e74462a3f5 
								
							 
						 
						
							
							
								
								Moved declarations to start of functions to satisfy MSVC C89 implementation.  
							
							
							
						 
						
							2015-02-11 11:16:57 -06:00  
				
					
						
							
							
								 
						
							
								056ba26755 
								
							 
						 
						
							
							
								
								Changed a number of inline calls to use __inline.  
							
							... 
							
							
							
							MSVC doesn't inmplement C99, so can't use the inline keyword. __inline
appears to work in MSVC and GCC. 
							
						 
						
							2015-02-11 11:13:17 -06:00  
				
					
						
							
							
								 
						
							
								3b20b62423 
								
							 
						 
						
							
							
								
								Fixed trti2 name.  
							
							
							
						 
						
							2015-02-09 15:29:28 -06:00  
				
					
						
							
							
								 
						
							
								6ddbfea700 
								
							 
						 
						
							
							
								
								Added generic laswp object.  
							
							
							
						 
						
							2015-02-09 15:15:58 -06:00  
				
					
						
							
							
								 
						
							
								e8c39138c6 
								
							 
						 
						
							
							
								
								Removed return value from GenerateNamedObjects.  
							
							... 
							
							
							
							It sets DBLAS_OBJS directly to save a bunch of list appending in the
CMakeLists.txt files. 
							
						 
						
							2015-02-09 12:28:09 -06:00  
				
					
						
							
							
								 
						
							
								13d2d48e67 
								
							 
						 
						
							
							
								
								Added yet another naming scheme for lapack functions.  
							
							
							
						 
						
							2015-02-06 13:42:20 -06:00  
				
					
						
							
							
								 
						
							
								373a1bdadb 
								
							 
						 
						
							
							
								
								Converted lapack/Makefile to cmake.  
							
							
							
						 
						
							2015-02-04 15:47:10 -06:00  
				
					
						
							
							
								 
						
							
								6c2ead30f0 
								
							 
						 
						
							
							
								
								Remove all trailing whitespace except lapack-netlib  
							
							... 
							
							
							
							Signed-off-by: Timothy Gu <timothygu99@gmail.com> 
							
						 
						
							2014-06-27 12:05:18 -07:00  
				
					
						
							
							
								 
						
							
								c26bbee489 
								
							 
						 
						
							
							
								
								enabled abd tested optimized trtri lapack functions  
							
							
							
						 
						
							2014-05-23 10:55:39 +02:00  
				
					
						
							
							
								 
						
							
								c4ccb3fbb2 
								
							 
						 
						
							
							
								
								removed lapack/getri because it was never used  
							
							
							
						 
						
							2014-05-21 14:21:19 +02:00  
				
					
						
							
							
								 
						
							
								a748d3a75d 
								
							 
						 
						
							
							
								
								enabled optimized trti2 lapack functions again  
							
							
							
						 
						
							2014-05-21 11:02:07 +02:00  
				
					
						
							
							
								 
						
							
								dbaeea7b59 
								
							 
						 
						
							
							
								
								enabled lauu2 and lauum lapack functions again  
							
							
							
						 
						
							2014-05-21 09:49:18 +02:00  
				
					
						
							
							
								 
						
							
								4f98f8c9b3 
								
							 
						 
						
							
							
								
								enabled and tested optimized potrf lapack functions  
							
							
							
						 
						
							2014-05-18 21:42:37 +02:00  
				
					
						
							
							
								 
						
							
								536875d463 
								
							 
						 
						
							
							
								
								enabled and tested optimized getrs lapack functions  
							
							
							
						 
						
							2014-05-18 21:13:56 +02:00  
				
					
						
							
							
								 
						
							
								ac029f81b3 
								
							 
						 
						
							
							
								
								enabled and tested optimized dgetrf function  
							
							
							
						 
						
							2014-05-18 19:07:51 +02:00  
				
					
						
							
							
								 
						
							
								a35a1a9ae7 
								
							 
						 
						
							
							
								
								changed makefiles for lapack development  
							
							
							
						 
						
							2014-05-07 11:33:02 +02:00  
				
					
						
							
							
								 
						
							
								4be4db590c 
								
							 
						 
						
							
							
								
								Merge remote branch 'origin/develop' into armv7  
							
							
							
						 
						
							2013-12-01 13:16:41 +01:00  
				
					
						
							
							
								 
						
							
								fe5f46c330 
								
							 
						 
						
							
							
								
								added experimental support for ARMV8  
							
							
							
						 
						
							2013-11-24 15:47:00 +01:00  
				
					
						
							
							
								 
						
							
								5048a80032 
								
							 
						 
						
							
							
								
								Refs  #283 . Fixed the incorrect usage of long data type for Windows 64.  
							
							
							
						 
						
							2013-11-14 13:46:42 +08:00  
				
					
						
							
							
								 
						
							
								73770e60b8 
								
							 
						 
						
							
							
								
								Refs  #309 . Fixed trtri_U single thread computational bug.  
							
							
							
						 
						
							2013-11-07 01:08:39 +08:00  
				
					
						
							
							
								 
						
							
								95aedfa0ff 
								
							 
						 
						
							
							
								
								added missing file arm/Makefile in lapack/laswp  
							
							
							
						 
						
							2013-11-03 11:19:32 +01:00  
				
					
						
							
							
								 
						
							
								a07cc39571 
								
							 
						 
						
							
							
								
								Refs  #266 . Fixed the compiling bug with Open64 5.0.  
							
							
							
						 
						
							2013-07-31 14:41:39 +08:00  
				
					
						
							
							
								 
						
							
								fd0c388681 
								
							 
						 
						
							
							
								
								Refs  #191 . A walk around for dtrtri_U single thread bug.  
							
							... 
							
							
							
							This function caused the failure of ERKALE serial test.
I replaced it with LAPACK source code. 
							
						 
						
							2013-07-14 22:16:30 +08:00  
				
					
						
							
							
								 
						
							
								32d2ca3035 
								
							 
						 
						
							
							
								
								Refs  #214 ,  #221 ,  #246 . Fixed the getrf overflow bug on Windows.  
							
							... 
							
							
							
							I used a smaller threshold since the stack size is 1MB on windows. 
							
						 
						
							2013-07-11 03:20:02 +08:00  
				
					
						
							
							
								 
						
							
								5d3312142a 
								
							 
						 
						
							
							
								
								Refs  #221   #246 . Fixed the overflowing stack bug in mutlithreading BLAS3.  
							
							... 
							
							
							
							When NUM_THREADS(MAX_CPU_NUNBERS) is very large ,e.g. 256.
typedef struct {
  volatile BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE];
} job_t;
job_t          job[MAX_CPU_NUMBER];
The job array is equal 8MB.
Thus, We use malloc instead of stack allocation. 
							
						 
						
							2013-07-08 01:07:05 +08:00  
				
					
						
							
							
								 
						
							
								4c2123c334 
								
							 
						 
						
							
							
								
								Fixed the overflowing bug in single thread cholesky factorization.  
							
							
							
						 
						
							2013-02-23 13:00:52 +08:00  
				
					
						
							
							
								 
						
							
								7bd1834d59 
								
							 
						 
						
							
							
								
								Refs  #130  Fixed laswp building bug with DYNAMIC_ARCH=1.  
							
							
							
						 
						
							2012-08-09 20:36:29 +08:00  
				
					
						
							
							
								 
						
							
								1b056c5328 
								
							 
						 
						
							
							
								
								Refs  #130  Prevent reading ipiv array beyond the bound in ?laswp. Use laswp instead of laswp_oncopy in getrf.  
							
							
							
						 
						
							2012-08-09 20:06:51 +08:00  
				
					
						
							
							
								 
						
							
								342bbc3871 
								
							 
						 
						
							
							
								
								Import GotoBLAS2 1.13 BSD version codes.  
							
							
							
						 
						
							2011-01-24 14:54:24 +00:00