Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								aa2a2d9c01 
								
							 
						 
						
							
							
								
								Conditionally compile files that may get replaced by ReLAPACK  
							
							 
							
							
							
						 
						
							2022-11-08 12:04:46 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								1b77764182 
								
							 
						 
						
							
							
								
								Conditionally leave out bits of LAPACK to be overridden by ReLAPACK  
							
							 
							
							
							
						 
						
							2022-11-08 12:02:59 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								fcda11c1ae 
								
							 
						 
						
							
							
								
								Revert special handling of GEMMT  
							
							 
							
							
							
						 
						
							2022-11-05 23:48:50 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								4743d80c22 
								
							 
						 
						
							
							
								
								Merge pull request  #3800  from thrasibule/raptorlake  
							
							 
							
							... 
							
							
							
							add raptor lake ids 
							
						 
						
							2022-11-05 18:05:48 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								5d02f2e83e 
								
							 
						 
						
							
							
								
								Merge pull request  #3806  from martin-frbg/dyn_coop  
							
							 
							
							... 
							
							
							
							Fix OPENBLAS_CORETYPE=COOPERLAKE not working in DYNAMIC_ARCH builds 
							
						 
						
							2022-11-03 21:37:39 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								da6e426b13 
								
							 
						 
						
							
							
								
								fix Cooperlake not selectable via environment variable  
							
							 
							
							
							
						 
						
							2022-11-03 18:13:35 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								c970717157 
								
							 
						 
						
							
							
								
								fix missing t in xgemmt rule  
							
							 
							
							... 
							
							
							
							Co-authored-by: Alexis <35051714+amontoison@users.noreply.github.com> 
							
						 
						
							2022-11-01 13:51:20 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								62a44c9c5d 
								
							 
						 
						
							
							
								
								Merge pull request  #3804  from martin-frbg/issue3803  
							
							 
							
							... 
							
							
							
							Remove excess initializer (leftover from rework of PR 3793) 
							
						 
						
							2022-10-31 20:42:33 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								c9d78dc3b2 
								
							 
						 
						
							
							
								
								Remove excess initializer (leftover from rework of PR 3793)  
							
							 
							
							
							
						 
						
							2022-10-31 16:57:03 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								65338a9493 
								
							 
						 
						
							
							
								
								Merge pull request  #3799  from bartoldeman/cscal-zscal-no-fma  
							
							 
							
							... 
							
							
							
							x86_64: prevent GCC and Clang from generating FMAs in cscal/zscal. 
							
						 
						
							2022-10-30 18:56:10 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								ea6c5f3cf5 
								
							 
						 
						
							
							
								
								Add option RELAPACK_REPLACE  
							
							 
							
							
							
						 
						
							2022-10-30 12:55:23 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								d39978cd7f 
								
							 
						 
						
							
							
								
								Fix includes  
							
							 
							
							
							
						 
						
							2022-10-30 12:53:19 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								ce7ea72de1 
								
							 
						 
						
							
							
								
								Fix include paths  
							
							 
							
							
							
						 
						
							2022-10-30 12:50:51 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								3ebf5d219d 
								
							 
						 
						
							
							
								
								handle INCLUDE_ALL and optional function prefixes  
							
							 
							
							
							
						 
						
							2022-10-30 12:49:07 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								a082d54035 
								
							 
						 
						
							
							
								
								Rename to avoid conflict with OpenBLAS' toplevel config.h  
							
							 
							
							
							
						 
						
							2022-10-30 12:47:01 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								eeebaf2294 
								
							 
						 
						
							
							
								
								move INCLUDE_ALL to (c)make options  
							
							 
							
							
							
						 
						
							2022-10-30 12:45:54 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								06b022b139 
								
							 
						 
						
							
							
								
								Fix ReLAPACK source selection  
							
							 
							
							
							
						 
						
							2022-10-30 12:42:36 +01:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								03bd1157d8 
								
							 
						 
						
							
							
								
								Merge pull request  #3793  from imzhuhl/new_sbgemm  
							
							 
							
							... 
							
							
							
							New sbgemm implementation for Neoverse N2 
							
						 
						
							2022-10-30 12:09:46 +01:00  
						
					 
				
					
						
							
							
								 
								Guillaume Horel
							
						 
						
							 
							
							
							
							
								
							
							
								e27ad3a6cc 
								
							 
						 
						
							
							
								
								add raptor lake ids  
							
							 
							
							
							
						 
						
							2022-10-28 11:45:43 -04:00  
						
					 
				
					
						
							
							
								 
								Honglin Zhu
							
						 
						
							 
							
							
							
							
								
							
							
								79066b6bf3 
								
							 
						 
						
							
							
								
								Change file name to match the norm and delete useless code.  
							
							 
							
							
							
						 
						
							2022-10-28 17:09:39 +08:00  
						
					 
				
					
						
							
							
								 
								Bart Oldeman
							
						 
						
							 
							
							
							
							
								
							
							
								e7e3aa2948 
								
							 
						 
						
							
							
								
								x86_64: prevent GCC and Clang from generating FMAs in cscal/zscal.  
							
							 
							
							... 
							
							
							
							If e.g. -march=haswell is set in CFLAGS, GCC generates FMAs by default, which
is inconsistent with the microkernels, none of which use FMAs. These
inconsistencies cause a few failures in the LAPACK testcases, where
eigenvalue results with/without eigenvectors are compared.
Moreover using FMAs for multiplication of complex numbers can give surprising
results, see 22aa81f  for more information.
This uses the same syntax as used in 22aa81f  for zarch (s390x). 
							
						 
						
							2022-10-27 18:16:43 -04:00  
						
					 
				
					
						
							
							
								 
								Honglin Zhu
							
						 
						
							 
							
							
							
							
								
							
							
								4989e039a5 
								
							 
						 
						
							
							
								
								Define SBGEMM_ALIGN_K for DYNAMIC_ARCH build  
							
							 
							
							
							
						 
						
							2022-10-27 14:10:26 +08:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								e7fd8d21a6 
								
							 
						 
						
							
							
								
								Add GEMMT based on looped GEMV  
							
							 
							
							
							
						 
						
							2022-10-26 15:33:58 +02:00  
						
					 
				
					
						
							
							
								 
								Honglin Zhu
							
						 
						
							 
							
							
							
							
								
							
							
								843e9fd0b9 
								
							 
						 
						
							
							
								
								Fix typo error  
							
							 
							
							
							
						 
						
							2022-10-26 17:06:33 +08:00  
						
					 
				
					
						
							
							
								 
								Honglin Zhu
							
						 
						
							 
							
							
							
							
								
							
							
								b00d5b9746 
								
							 
						 
						
							
							
								
								New sbgemm implementation for Neoverse N2  
							
							 
							
							... 
							
							
							
							1. Use UZP instructions but not gather load and scatter store instructions to get lower latency.
    2. Padding k to a power of 4. 
							
						 
						
							2022-10-26 15:09:41 +08:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								8c10f0abba 
								
							 
						 
						
							
							
								
								Merge pull request  #3794  from bartoldeman/benchmark-align-malloc  
							
							 
							
							... 
							
							
							
							Benchmarks: align malloc'ed buffers. 
							
						 
						
							2022-10-21 16:13:58 +02:00  
						
					 
				
					
						
							
							
								 
								Bart Oldeman
							
						 
						
							 
							
							
							
							
								
							
							
								9e6b060bf3 
								
							 
						 
						
							
							
								
								Fix comment.  
							
							 
							
							... 
							
							
							
							It stores the pointer, not an offset (that would be an alternative approach). 
							
						 
						
							2022-10-20 20:11:09 -04:00  
						
					 
				
					
						
							
							
								 
								Bart Oldeman
							
						 
						
							 
							
							
							
							
								
							
							
								9959a60873 
								
							 
						 
						
							
							
								
								Benchmarks: align malloc'ed buffers.  
							
							 
							
							... 
							
							
							
							Benchmarks should allocate with cacheline (often 64 bytes) alignment
to avoid unreliable timings. This technique, storing the offset in the
byte before the pointer, doesn't require C11's aligned_alloc for
compatibility with older compilers.
For example, Glibc's x86_64 malloc returns 16-byte aligned buffers, which is
not sufficient for AVX/AVX2 (32-byte preferred) or AVX512 (64-byte). 
							
						 
						
							2022-10-20 13:28:20 -04:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								ad424fce08 
								
							 
						 
						
							
							
								
								Merge pull request  #3791  from martin-frbg/issue3790  
							
							 
							
							... 
							
							
							
							Fix pkgconfig file generation for INTERFACE64 builds 
							
						 
						
							2022-10-19 07:11:33 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								5f72415f10 
								
							 
						 
						
							
							
								
								Suffix the pkgconfig file itself in INTERFACE64 builds  
							
							 
							
							
							
						 
						
							2022-10-18 20:29:24 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								747ade5adf 
								
							 
						 
						
							
							
								
								fix INTERFACE64/USE64BITINT reporting  
							
							 
							
							
							
						 
						
							2022-10-18 17:28:07 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								8bacea1254 
								
							 
						 
						
							
							
								
								Pass libsuffix to openblas.pc and fix passing of INTERFACE64/USE64BITINT flag  
							
							 
							
							
							
						 
						
							2022-10-18 16:18:29 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								b2523471c9 
								
							 
						 
						
							
							
								
								Add libsuffix support  
							
							 
							
							
							
						 
						
							2022-10-18 16:16:26 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								11b2570c13 
								
							 
						 
						
							
							
								
								Merge pull request  #3786  from martin-frbg/issue3784  
							
							 
							
							... 
							
							
							
							Disable the gfortran tree vectorizer for lapack-netlib 
							
						 
						
							2022-10-13 18:34:28 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								ab6009b0b6 
								
							 
						 
						
							
							
								
								Merge pull request  #3773  from staticfloat/sf/openblas_default_num_threads  
							
							 
							
							... 
							
							
							
							Add `OPENBLAS_DEFAULT_NUM_THREADS` 
							
						 
						
							2022-10-13 14:15:14 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								32566bfb44 
								
							 
						 
						
							
							
								
								Disable the gfortran tree vectorizer for netlib LAPACK  
							
							 
							
							
							
						 
						
							2022-10-13 14:04:25 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								57809526c4 
								
							 
						 
						
							
							
								
								Disable the gfortran tree vectorizer for lapack-netlib  
							
							 
							
							
							
						 
						
							2022-10-13 09:12:23 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								eece0dfd14 
								
							 
						 
						
							
							
								
								Merge pull request  #3781  from martin-frbg/issue3779  
							
							 
							
							... 
							
							
							
							Fix building with only a subset of variable types on Windows 
							
						 
						
							2022-10-01 19:26:09 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								db50ab4a72 
								
							 
						 
						
							
							
								
								Add BUILD_vartype defines  
							
							 
							
							
							
						 
						
							2022-10-01 15:14:51 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								a84a8a7096 
								
							 
						 
						
							
							
								
								Merge pull request  #3778  from martin-frbg/issue3775  
							
							 
							
							... 
							
							
							
							Fix misdetection of gfortran on Cray systems 
							
						 
						
							2022-10-01 15:12:40 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								79d842047a 
								
							 
						 
						
							
							
								
								Move Cray case after GNU as Cray builds of gfortran have both names in the version string  
							
							 
							
							
							
						 
						
							2022-09-30 11:58:15 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								5e78493d95 
								
							 
						 
						
							
							
								
								Move Cray case after GNU as Cray builds of gfortran have both names in the version string  
							
							 
							
							
							
						 
						
							2022-09-30 11:55:56 +02:00  
						
					 
				
					
						
							
							
								 
								Elliot Saba
							
						 
						
							 
							
							
							
							
								
							
							
								d2ce93179f 
								
							 
						 
						
							
							
								
								Add `OPENBLAS_DEFAULT_NUM_THREADS`  
							
							 
							
							... 
							
							
							
							This allows Julia to set a default number of threads (usually `1`) to be
used when no other thread counts are specified [0], to short-circuit the
default OpenBLAS thread initialization routine that spins up a different
number of threads than Julia would otherwise choose.
The reason to add a new environment variable is that we want to be able
to configure OpenBLAS to avoid performing its initial memory
allocation/thread startup, as that can consume significant amounts of
memory, but we still want to be sensitive to legacy codebases that set
things like `OMP_NUM_THREADS` or `GOTOBLAS_NUM_THREADS`.  Creating a new
environment variable that is openblas-specific and is not already
publicly used to control the overall number of threads of programs like
Julia seems to be the best way forward.
[0] https://github.com/JuliaLang/julia/pull/46844  
							
						 
						
							2022-09-30 01:21:44 +00:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								8e851160d7 
								
							 
						 
						
							
							
								
								Merge pull request  #3772  from siko1056/develop  
							
							 
							
							... 
							
							
							
							Support CONSISTENT_FPCSR on aarch64 systems 
							
						 
						
							2022-09-29 20:22:50 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								cf132deb14 
								
							 
						 
						
							
							
								
								Merge pull request  #3774  from sashashura/patch-1  
							
							 
							
							... 
							
							
							
							GitHub Workflows security hardening 
							
						 
						
							2022-09-29 18:49:50 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								6077d81161 
								
							 
						 
						
							
							
								
								Merge pull request  #3777  from martin-frbg/fixmips64generic2  
							
							 
							
							... 
							
							
							
							Fix MIPS64_GENERIC copyobj declarations for DYNAMIC_ARCH 
							
						 
						
							2022-09-29 13:50:59 +02:00  
						
					 
				
					
						
							
							
								 
								Martin Kroeker
							
						 
						
							 
							
							
								
								
							
							
							
								
							
							
								f6f35a4288 
								
							 
						 
						
							
							
								
								fix copyobj declarations to work with DYNAMIC_ARCH  
							
							 
							
							
							
						 
						
							2022-09-29 08:47:14 +02:00  
						
					 
				
					
						
							
							
								 
								Alex
							
						 
						
							 
							
							
							
							
								
							
							
								c726604319 
								
							 
						 
						
							
							
								
								build: harden dynamic_arch.yml permissions  
							
							 
							
							... 
							
							
							
							Signed-off-by: Alex <aleksandrosansan@gmail.com> 
							
						 
						
							2022-09-26 13:48:11 +02:00  
						
					 
				
					
						
							
							
								 
								Alex
							
						 
						
							 
							
							
							
							
								
							
							
								4de8e1b8f9 
								
							 
						 
						
							
							
								
								build: harden mips64.yml permissions  
							
							 
							
							... 
							
							
							
							Signed-off-by: Alex <aleksandrosansan@gmail.com> 
							
						 
						
							2022-09-26 13:47:15 +02:00  
						
					 
				
					
						
							
							
								 
								Alex
							
						 
						
							 
							
							
							
							
								
							
							
								11cd108095 
								
							 
						 
						
							
							
								
								build: harden nightly-Homebrew-build.yml permissions  
							
							 
							
							... 
							
							
							
							Signed-off-by: Alex <aleksandrosansan@gmail.com> 
							
						 
						
							2022-09-26 13:46:34 +02:00