b4495a8fb8 
								
							 
						 
						
							
							
								
								Merge branch 'develop' into arm64_cmake_small_matrix_opt  
							
							
							
						 
						
							2024-10-03 20:04:52 +02:00  
				
					
						
							
							
								 
						
							
								7087b0a7d0 
								
							 
						 
						
							
							
								
								ARM64: Enable SMALL_MATRIX_OPT when compiling with CMake  
							
							
							
						 
						
							2024-09-29 10:31:26 +08:00  
				
					
						
							
							
								 
						
							
								30af9278dc 
								
							 
						 
						
							
							
								
								LoongArch64: Enable cmake cross-compilation  
							
							
							
						 
						
							2024-09-29 10:13:30 +08:00  
				
					
						
							
							
								 
						
							
								1265eee85c 
								
							 
						 
						
							
							
								
								fix cmake typo for power10 cc version check  
							
							... 
							
							
							
							fixes 668f48f4fc 
							
						 
						
							2024-08-09 20:38:58 +02:00  
				
					
						
							
							
								 
						
							
								cc36db643e 
								
							 
						 
						
							
							
								
								Support new LAPACK build option LAPACK_STRLEN  
							
							
							
						 
						
							2024-08-06 17:31:03 +02:00  
				
					
						
							
							
								 
						
							
								e8bd97ab4b 
								
							 
						 
						
							
							
								
								add RISCV64 entries for DYNAMIC_ARCH  
							
							
							
						 
						
							2024-08-03 23:56:59 +02:00  
				
					
						
							
							
								 
						
							
								9eecd0d33b 
								
							 
						 
						
							
							
								
								enable GEMM/GEMV forwarding for riscv and ppc  
							
							
							
						 
						
							2024-07-31 23:29:12 +02:00  
				
					
						
							
							
								 
						
							
								b26424c6a2 
								
							 
						 
						
							
							
								
								Allow opt into GEMM -> GEMV forwarding  
							
							
							
						 
						
							2024-07-31 13:09:14 +01:00  
				
					
						
							
							
								 
						
							
								821ef34635 
								
							 
						 
						
							
							
								
								Add A64FX to the list of CPUs supported by DYNAMIC_ARCH  
							
							
							
						 
						
							2024-07-23 20:44:39 +09:00  
				
					
						
							
							
								 
						
							
								cea4abcac0 
								
							 
						 
						
							
							
								
								Fix compiling on mingw  
							
							
							
						 
						
							2024-07-04 14:56:16 +02:00  
				
					
						
							
							
								 
						
							
								a3f6b13bc9 
								
							 
						 
						
							
							
								
								remove spurious brace  
							
							
							
						 
						
							2024-05-16 09:25:53 +02:00  
				
					
						
							
							
								 
						
							
								668f48f4fc 
								
							 
						 
						
							
							
								
								Use CMAKE_C_COMPILER_VERSION instead of dumpversion calls ( #4698 )  
							
							... 
							
							
							
							* Use CMAKE_C_COMPILER_VERSION throughout 
							
						 
						
							2024-05-15 23:58:14 +02:00  
				
					
						
							
							
								 
						
							
								3d26837a35 
								
							 
						 
						
							
							
								
								Suppress GCC14 error exit in the f2c-converted LAPACK  
							
							
							
						 
						
							2024-04-30 19:05:18 +02:00  
				
					
						
							
							
								 
						
							
								cda55f2fd2 
								
							 
						 
						
							
							
								
								Don't pass `-exhaustive-register-search` directly to clang compiler  
							
							... 
							
							
							
							`-exhaustive-register-search` is an LLVM code generation flag that shouldn't be passed directly to clang compiler. 
							
						 
						
							2024-04-06 05:54:48 +01:00  
				
					
						
							
							
								 
						
							
								52b71a1673 
								
							 
						 
						
							
							
								
								Filter out FFLAGS that flang-new from LLVM18 no longer supports ( #4569 )  
							
							... 
							
							
							
							* Filter out FFLAGS that flang-new from LLVM18 no longer supports 
							
						 
						
							2024-03-22 17:02:39 +01:00  
				
					
						
							
							
								 
						
							
								a0e3f77e0b 
								
							 
						 
						
							
							
								
								add FIXED_LIBNAME, PREFIX and SUFFIX  
							
							
							
						 
						
							2024-02-15 12:17:38 +01:00  
				
					
						
							
							
								 
						
							
								49689fbef7 
								
							 
						 
						
							
							
								
								Add support for compiling SVE kernels with the NVIDIA HPC compiler  
							
							
							
						 
						
							2023-08-25 17:11:04 +02:00  
				
					
						
							
							
								 
						
							
								ac698cedad 
								
							 
						 
						
							
							
								
								Add compiler options for ARM64 SVE targets in DYNAMIC_ARCH builds  
							
							
							
						 
						
							2023-07-05 09:47:49 +02:00  
				
					
						
							
							
								 
						
							
								d2144b2981 
								
							 
						 
						
							
							
								
								Add NVHPC  
							
							
							
						 
						
							2023-06-09 19:01:15 +02:00  
				
					
						
							
							
								 
						
							
								de937b3194 
								
							 
						 
						
							
							
								
								Add clang option to avoid running out of registers in AVX512 assembly  
							
							
							
						 
						
							2023-03-17 21:22:37 +01:00  
				
					
						
							
							
								 
						
							
								e964ebd0d0 
								
							 
						 
						
							
							
								
								Add compiler option for AVX512-capable Ryzen(4)  
							
							
							
						 
						
							2023-02-02 19:04:05 +01:00  
				
					
						
							
							
								 
						
							
								a0a4f7c447 
								
							 
						 
						
							
							
								
								Add -mfma to -mavx2 for clang, and add AVX2 declaration for Zen in DYNAMIC_ARCH builds  
							
							
							
						 
						
							2022-09-13 22:47:00 +02:00  
				
					
						
							
							
								 
						
							
								85fd3c4279 
								
							 
						 
						
							
							
								
								Support compilation with the Cray C and Fortran compilers ( #3712 )  
							
							... 
							
							
							
							* Add support for the Cray Fortran compiler 
							
						 
						
							2022-08-04 20:42:18 +02:00  
				
					
						
							
							
								 
						
							
								18b19d135b 
								
							 
						 
						
							
							
								
								C_LAPACK: Fixes to make it compile with MSVC ( #3605 )  
							
							... 
							
							
							
							* Fix f2c-like support functions to compile with MSVC, and
re-enable C_LAPACK for MSVC in CMAKE
* Add MSVC&flang build to Azure CI in order to check C_LAPACK correctness 
							
						 
						
							2022-04-17 17:49:38 +02:00  
				
					
						
							
							
								 
						
							
								b7873605d4 
								
							 
						 
						
							
							
								
								Use f2c translations of LAPACK when no Fortran compiler is available ( #3539 )  
							
							... 
							
							
							
							* Add C equivalents of the Fortran routines from Reference-LAPACK as fallbacks, and C_LAPACK variable to trigger their use 
							
						 
						
							2022-04-09 22:38:58 +02:00  
				
					
						
							
							
								 
						
							
								d38110a5ce 
								
							 
						 
						
							
							
								
								Use CMake variables instead of as  
							
							
							
						 
						
							2021-12-10 17:46:53 -06:00  
				
					
						
							
							
								 
						
							
								214fbcee15 
								
							 
						 
						
							
							
								
								Fix cmake for power  
							
							
							
						 
						
							2021-12-09 08:28:17 -06:00  
				
					
						
							
							
								 
						
							
								de2ed66596 
								
							 
						 
						
							
							
								
								cmake: Set SUFFIX64 also for NOFORTRAN  
							
							
							
						 
						
							2021-11-15 08:53:52 +01:00  
				
					
						
							
							
								 
						
							
								3dc6052c7e 
								
							 
						 
						
							
							
								
								initial support for Sapphire Rapids platform  
							
							
							
						 
						
							2021-10-12 01:30:40 -07:00  
				
					
						
							
							
								 
						
							
								e02df9fc55 
								
							 
						 
						
							
							
								
								Propagate BUILD_BFLOAT16 to CFLAGS  
							
							
							
						 
						
							2021-09-14 16:12:27 +02:00  
				
					
						
							
							
								 
						
							
								76ea8db4da 
								
							 
						 
						
							
							
								
								Small Matrix: enable by default for x86_64 arch  
							
							... 
							
							
							
							If no customized GEMM_SMALL_M_PERMIT kernel defined, it will just by pass to normal path. 
							
						 
						
							2021-08-05 02:59:36 +00:00  
				
					
						
							
							
								 
						
							
								fee5abd84b 
								
							 
						 
						
							
							
								
								Small Matrix: support cmake build  
							
							
							
						 
						
							2021-08-04 08:50:15 +00:00  
				
					
						
							
							
								 
						
							
								30f23be0f9 
								
							 
						 
						
							
							
								
								Rework setting of -mfma to only apply it where necessary  
							
							
							
						 
						
							2021-07-22 12:00:03 +02:00  
				
					
						
							
							
								 
						
							
								91e2b11d3c 
								
							 
						 
						
							
							
								
								add to cmake listings too  
							
							
							
						 
						
							2021-06-20 15:32:42 +02:00  
				
					
						
							
							
								 
						
							
								725432efaa 
								
							 
						 
						
							
							
								
								pass NO_AVX512 macro def  
							
							
							
						 
						
							2021-04-07 00:10:41 +08:00  
				
					
						
							
							
								 
						
							
								33b5670122 
								
							 
						 
						
							
							
								
								Merge pull request  #3096  from martin-frbg/fixclangcmake  
							
							... 
							
							
							
							Fix Cooperlake/DYNAMIC_ARCH builds with clang on Windows 
							
						 
						
							2021-02-02 13:33:15 +01:00  
				
					
						
							
							
								 
						
							
								95e19e2e23 
								
							 
						 
						
							
							
								
								fix case in compiler name check  
							
							... 
							
							
							
							Co-authored-by: xoviat <49173759+xoviat@users.noreply.github.com> 
							
						 
						
							2021-02-02 10:53:46 +01:00  
				
					
						
							
							
								 
						
							
								99ac042702 
								
							 
						 
						
							
							
								
								remove spurious lines (probably editor malfunction)  
							
							
							
						 
						
							2021-02-01 21:02:53 +01:00  
				
					
						
							
							
								 
						
							
								774b9f8653 
								
							 
						 
						
							
							
								
								handle AppleClang in Cooperlake support condition  
							
							
							
						 
						
							2021-02-01 20:18:53 +01:00  
				
					
						
							
							
								 
						
							
								eb1d2344f7 
								
							 
						 
						
							
							
								
								Fix compiler version check for Intel Cooperlake support (clang-cl does not accept -dumpversion)  
							
							
							
						 
						
							2021-02-01 19:45:25 +01:00  
				
					
						
							
							
								 
						
							
								b60de4447a 
								
							 
						 
						
							
							
								
								add cortex-m platform  
							
							
							
						 
						
							2021-01-19 08:57:44 -06:00  
				
					
						
							
							
								 
						
							
								438a8e5624 
								
							 
						 
						
							
							
								
								Fix placement of getarch call and spurious cpu property accumulation in DYNAMIC_ARCH builds  
							
							
							
						 
						
							2020-11-07 20:26:12 +01:00  
				
					
						
							
							
								 
						
							
								0155cd53a3 
								
							 
						 
						
							
							
								
								Add -msse3 where needed for DYNAMIC_ARCH builds  
							
							
							
						 
						
							2020-11-03 23:45:49 +01:00  
				
					
						
							
							
								 
						
							
								b9bc76aec4 
								
							 
						 
						
							
							
								
								Add files via upload  
							
							
							
						 
						
							2020-11-02 22:43:50 +01:00  
				
					
						
							
							
								 
						
							
								f64243ff57 
								
							 
						 
						
							
							
								
								Add compiler options for sse/sse2/ssse3/sse4.1  
							
							
							
						 
						
							2020-10-16 10:47:06 +02:00  
				
					
						
							
							
								 
						
							
								e3a29f6b58 
								
							 
						 
						
							
							
								
								Change "HALF" and "sh" to "BFLOAT16" and "sb"  
							
							
							
						 
						
							2020-10-12 00:07:37 +02:00  
				
					
						
							
							
								 
						
							
								68e6823d36 
								
							 
						 
						
							
							
								
								Adapt for supporting only a subset of variable types  
							
							
							
						 
						
							2020-10-11 15:01:32 +02:00  
				
					
						
							
							
								 
						
							
								e1b7123bbe 
								
							 
						 
						
							
							
								
								Merge pull request  #2867  from Qiyu8/usimd-floatdot  
							
							... 
							
							
							
							Optimize the performance of dot by using universal intrinsics in X86/ARM 
							
						 
						
							2020-10-10 12:10:25 +02:00  
				
					
						
							
							
								 
						
							
								f32d34a015 
								
							 
						 
						
							
							
								
								add sse3 compiler flag  
							
							
							
						 
						
							2020-10-10 10:36:15 +08:00  
				
					
						
							
							
								 
						
							
								a5feea6611 
								
							 
						 
						
							
							
								
								make BLAS3_MEM_ALLOC_THRESHOLD configurable on non-Windows  
							
							
							
						 
						
							2020-10-04 23:01:06 +02:00