From d7a77091a3468354ac57ee76a682c04ac9c5ad03 Mon Sep 17 00:00:00 2001 From: Jake Arkinstall <65358059+jake-arkinstall@users.noreply.github.com> Date: Wed, 10 Feb 2021 12:11:17 +0000 Subject: [PATCH 01/12] Addressed issue #3100, removing an unnecessary write to the include directory --- cmake/lapacke.cmake | 1 - 1 file changed, 1 deletion(-) diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index f10905c4d..54a583887 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -2499,6 +2499,5 @@ foreach (Utils_FILE ${Utils_SRC}) endforeach () set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include") -configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY) include_directories(${lapacke_include_dir}) set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") From ece3ce581e3ec530eaccfe7f284c52e115ec7aa9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 10 Feb 2021 14:22:59 +0100 Subject: [PATCH 02/12] Strip parenthesized (pkgversion) data from GCC version string to avoid misinterpretation --- f_check | 1 + 1 file changed, 1 insertion(+) diff --git a/f_check b/f_check index e9aca4ff9..ffe9c6b46 100644 --- a/f_check +++ b/f_check @@ -75,6 +75,7 @@ if ($compiler eq "") { } elsif ($data =~ /GNU/ || $data =~ /GCC/ ) { + $data =~ s/\(+.*?\)+//g; $data =~ /(\d+)\.(\d+).(\d+)/; $major = $1; $minor = $2; From db348dcff2b3267e40de634bda9173370dd6b001 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:23:05 +0100 Subject: [PATCH 03/12] Enable optimized srot/drot kernels from Haswell --- kernel/x86_64/KERNEL.ZEN | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/x86_64/KERNEL.ZEN b/kernel/x86_64/KERNEL.ZEN index 7bb308fea..a66394be3 100644 --- a/kernel/x86_64/KERNEL.ZEN +++ b/kernel/x86_64/KERNEL.ZEN @@ -97,3 +97,5 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c +SROTKERNEL = srot.c +DROTKERNEL = drot.c From 46509953a9dd1907f05465e2212d4477cb26b14c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:24:16 +0100 Subject: [PATCH 04/12] Use Haswell optimizations for Zen as well --- kernel/x86_64/drot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/drot.c b/kernel/x86_64/drot.c index 66e9ff907..ab5048bd1 100644 --- a/kernel/x86_64/drot.c +++ b/kernel/x86_64/drot.c @@ -2,7 +2,7 @@ #if defined(SKYLAKEX) #include "drot_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "drot_microk_haswell-2.c" #endif From 950c047b49c159fd8a8804ecae351cccc2865d02 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:24:51 +0100 Subject: [PATCH 05/12] Use Haswell optimizations for Zen as well --- kernel/x86_64/srot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index 3264d251a..587cf8e40 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -2,7 +2,7 @@ #if defined(SKYLAKEX) #include "srot_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "srot_microk_haswell-2.c" #endif From ce7ddd8921fa784079face668eab93c778623cac Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:25:36 +0100 Subject: [PATCH 06/12] Use Haswell optimizations for Zen as well --- kernel/x86_64/sasum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c index d0cea9bee..a021741c7 100644 --- a/kernel/x86_64/sasum.c +++ b/kernel/x86_64/sasum.c @@ -11,7 +11,7 @@ #if defined(SKYLAKEX) #include "sasum_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "sasum_microk_haswell-2.c" #endif From 47691c031fa128ed65f630dd009a943465a2d92f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 09:26:15 +0100 Subject: [PATCH 07/12] Use Haswell optimizations for Zen as well --- kernel/x86_64/dasum.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c index 534f257d2..8af9e798b 100644 --- a/kernel/x86_64/dasum.c +++ b/kernel/x86_64/dasum.c @@ -6,7 +6,7 @@ #if defined(SKYLAKEX) #include "dasum_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "dasum_microk_haswell-2.c" #endif From ae53e3e23343739e61439e39cbcac1f0d684b134 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 20:16:27 +0100 Subject: [PATCH 08/12] Recognize Intel Tiger Lake as SkylakeX --- cpuid_x86.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cpuid_x86.c b/cpuid_x86.c index aca37da45..44704fcd9 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1418,6 +1418,15 @@ int get_cpuname(void){ case 9: case 8: switch (model) { + case 12: // Tiger Lake + if(support_avx512()) + return CPUTYPE_SKYLAKEX; + if(support_avx2()) + return CPUTYPE_HASWELL; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; case 14: // Kaby Lake and refreshes if(support_avx2()) return CPUTYPE_HASWELL; @@ -2124,6 +2133,16 @@ int get_coretype(void){ break; case 9: case 8: + if (model == 12) { // Tiger Lake + if(support_avx512()) + return CPUTYPE_SKYLAKEX; + if(support_avx2()) + return CPUTYPE_HASWELL; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; + } if (model == 14) { // Kaby Lake if(support_avx()) #ifndef NO_AVX2 From e4e5042e3859583387eb43c143c57bab671002a9 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 11 Feb 2021 20:17:11 +0100 Subject: [PATCH 09/12] Recognize Intel Tiger Lake as SkylakeX --- driver/others/dynamic.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 7845d6951..158e1b3da 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -644,6 +644,21 @@ static gotoblas_t *get_coretype(void){ return NULL; case 9: case 8: + if (model == 12) { // Tiger Lake + if (support_avx512()) + return &gotoblas_SKYLAKEX; + if(support_avx2()){ + openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK); + return &gotoblas_HASWELL; + } + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; + } + } if (model == 14 ) { // Kaby Lake, Coffee Lake if(support_avx2()) return &gotoblas_HASWELL; From 63fa6c832ea142ecac3c61e2ce542949ae8ccdcb Mon Sep 17 00:00:00 2001 From: Rajalakshmi Srinivasaraghavan Date: Thu, 11 Feb 2021 21:28:03 -0600 Subject: [PATCH 10/12] Fix build issue on POWER8 with DYNAMIC_ARCH Running make DYNAMIC_ARCH=1 on POWER 8 BE with gcc10.2 version, gives the following error due to the difference in UNROLL_M/N. 'No rule to make target 'dgemm_incopy_POWER10.o', needed by kernel' --- param.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/param.h b/param.h index 6a790ab61..9ba25de6a 100644 --- a/param.h +++ b/param.h @@ -2443,8 +2443,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define DGEMM_DEFAULT_UNROLL_M 16 +#define DGEMM_DEFAULT_UNROLL_N 4 +#else #define DGEMM_DEFAULT_UNROLL_M 8 #define DGEMM_DEFAULT_UNROLL_N 8 +#endif #define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_M 8 From b0bded3f2f3da67a1e8ac1ab10a04a73838a13cd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 18 Feb 2021 11:14:05 +0100 Subject: [PATCH 11/12] Fix get_num_procs() in the USE_TLS branch for non-glibc systems --- driver/others/memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 0d4b2ff31..75203a7b0 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -222,11 +222,11 @@ int get_num_procs(void); #else int get_num_procs(void) { static int nums = 0; + +#if defined(__GLIBC_PREREQ) cpu_set_t cpuset,*cpusetp; size_t size; int ret; - -#if defined(__GLIBC_PREREQ) #if !__GLIBC_PREREQ(2, 7) int i; #if !__GLIBC_PREREQ(2, 6) From dbbf92c1d120c22c0ce7d5b8e1d7ec35f9bace34 Mon Sep 17 00:00:00 2001 From: Peter Hawkins Date: Thu, 18 Feb 2021 13:46:50 -0500 Subject: [PATCH 12/12] Fix race in blas_thread_shutdown. blas_server_avail was read without holding server_lock. If multiple threads call blas_thread_shutdown simultaneously, for example, by calling fork(), then they can attempt to shut down multiple times. This can lead to a segmentation fault. --- driver/others/blas_server.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 5e0943c2e..fa07a1ea4 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -1024,38 +1024,39 @@ int BLASFUNC(blas_thread_shutdown)(void){ int i; - if (!blas_server_avail) return 0; - LOCK_COMMAND(&server_lock); - for (i = 0; i < blas_num_threads - 1; i++) { + if (blas_server_avail) { + + for (i = 0; i < blas_num_threads - 1; i++) { - pthread_mutex_lock (&thread_status[i].lock); + pthread_mutex_lock (&thread_status[i].lock); - atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1); - thread_status[i].status = THREAD_STATUS_WAKEUP; - pthread_cond_signal (&thread_status[i].wakeup); + atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1); + thread_status[i].status = THREAD_STATUS_WAKEUP; + pthread_cond_signal (&thread_status[i].wakeup); - pthread_mutex_unlock(&thread_status[i].lock); + pthread_mutex_unlock(&thread_status[i].lock); - } + } - for(i = 0; i < blas_num_threads - 1; i++){ - pthread_join(blas_threads[i], NULL); - } + for(i = 0; i < blas_num_threads - 1; i++){ + pthread_join(blas_threads[i], NULL); + } - for(i = 0; i < blas_num_threads - 1; i++){ - pthread_mutex_destroy(&thread_status[i].lock); - pthread_cond_destroy (&thread_status[i].wakeup); - } + for(i = 0; i < blas_num_threads - 1; i++){ + pthread_mutex_destroy(&thread_status[i].lock); + pthread_cond_destroy (&thread_status[i].wakeup); + } #ifdef NEED_STACKATTR - pthread_attr_destory(&attr); + pthread_attr_destroy(&attr); #endif - blas_server_avail = 0; + blas_server_avail = 0; + } UNLOCK_COMMAND(&server_lock); return 0;