diff --git a/cmake/lapacke.cmake b/cmake/lapacke.cmake index f10905c4d..54a583887 100644 --- a/cmake/lapacke.cmake +++ b/cmake/lapacke.cmake @@ -2499,6 +2499,5 @@ foreach (Utils_FILE ${Utils_SRC}) endforeach () set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include") -configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY) include_directories(${lapacke_include_dir}) set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") diff --git a/cpuid_x86.c b/cpuid_x86.c index aca37da45..44704fcd9 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -1418,6 +1418,15 @@ int get_cpuname(void){ case 9: case 8: switch (model) { + case 12: // Tiger Lake + if(support_avx512()) + return CPUTYPE_SKYLAKEX; + if(support_avx2()) + return CPUTYPE_HASWELL; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; case 14: // Kaby Lake and refreshes if(support_avx2()) return CPUTYPE_HASWELL; @@ -2124,6 +2133,16 @@ int get_coretype(void){ break; case 9: case 8: + if (model == 12) { // Tiger Lake + if(support_avx512()) + return CPUTYPE_SKYLAKEX; + if(support_avx2()) + return CPUTYPE_HASWELL; + if(support_avx()) + return CPUTYPE_SANDYBRIDGE; + else + return CPUTYPE_NEHALEM; + } if (model == 14) { // Kaby Lake if(support_avx()) #ifndef NO_AVX2 diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 5e0943c2e..fa07a1ea4 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -1024,38 +1024,39 @@ int BLASFUNC(blas_thread_shutdown)(void){ int i; - if (!blas_server_avail) return 0; - LOCK_COMMAND(&server_lock); - for (i = 0; i < blas_num_threads - 1; i++) { + if (blas_server_avail) { + + for (i = 0; i < blas_num_threads - 1; i++) { - pthread_mutex_lock (&thread_status[i].lock); + pthread_mutex_lock (&thread_status[i].lock); - atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1); - thread_status[i].status = THREAD_STATUS_WAKEUP; - pthread_cond_signal (&thread_status[i].wakeup); + atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1); + thread_status[i].status = THREAD_STATUS_WAKEUP; + pthread_cond_signal (&thread_status[i].wakeup); - pthread_mutex_unlock(&thread_status[i].lock); + pthread_mutex_unlock(&thread_status[i].lock); - } + } - for(i = 0; i < blas_num_threads - 1; i++){ - pthread_join(blas_threads[i], NULL); - } + for(i = 0; i < blas_num_threads - 1; i++){ + pthread_join(blas_threads[i], NULL); + } - for(i = 0; i < blas_num_threads - 1; i++){ - pthread_mutex_destroy(&thread_status[i].lock); - pthread_cond_destroy (&thread_status[i].wakeup); - } + for(i = 0; i < blas_num_threads - 1; i++){ + pthread_mutex_destroy(&thread_status[i].lock); + pthread_cond_destroy (&thread_status[i].wakeup); + } #ifdef NEED_STACKATTR - pthread_attr_destory(&attr); + pthread_attr_destroy(&attr); #endif - blas_server_avail = 0; + blas_server_avail = 0; + } UNLOCK_COMMAND(&server_lock); return 0; diff --git a/driver/others/dynamic.c b/driver/others/dynamic.c index 7845d6951..158e1b3da 100644 --- a/driver/others/dynamic.c +++ b/driver/others/dynamic.c @@ -644,6 +644,21 @@ static gotoblas_t *get_coretype(void){ return NULL; case 9: case 8: + if (model == 12) { // Tiger Lake + if (support_avx512()) + return &gotoblas_SKYLAKEX; + if(support_avx2()){ + openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK); + return &gotoblas_HASWELL; + } + if(support_avx()) { + openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK); + return &gotoblas_SANDYBRIDGE; + } else { + openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK); + return &gotoblas_NEHALEM; + } + } if (model == 14 ) { // Kaby Lake, Coffee Lake if(support_avx2()) return &gotoblas_HASWELL; diff --git a/driver/others/memory.c b/driver/others/memory.c index 0d4b2ff31..75203a7b0 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -222,11 +222,11 @@ int get_num_procs(void); #else int get_num_procs(void) { static int nums = 0; + +#if defined(__GLIBC_PREREQ) cpu_set_t cpuset,*cpusetp; size_t size; int ret; - -#if defined(__GLIBC_PREREQ) #if !__GLIBC_PREREQ(2, 7) int i; #if !__GLIBC_PREREQ(2, 6) diff --git a/f_check b/f_check index e9aca4ff9..ffe9c6b46 100644 --- a/f_check +++ b/f_check @@ -75,6 +75,7 @@ if ($compiler eq "") { } elsif ($data =~ /GNU/ || $data =~ /GCC/ ) { + $data =~ s/\(+.*?\)+//g; $data =~ /(\d+)\.(\d+).(\d+)/; $major = $1; $minor = $2; diff --git a/kernel/x86_64/KERNEL.ZEN b/kernel/x86_64/KERNEL.ZEN index 7bb308fea..a66394be3 100644 --- a/kernel/x86_64/KERNEL.ZEN +++ b/kernel/x86_64/KERNEL.ZEN @@ -97,3 +97,5 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c +SROTKERNEL = srot.c +DROTKERNEL = drot.c diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c index 534f257d2..8af9e798b 100644 --- a/kernel/x86_64/dasum.c +++ b/kernel/x86_64/dasum.c @@ -6,7 +6,7 @@ #if defined(SKYLAKEX) #include "dasum_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "dasum_microk_haswell-2.c" #endif diff --git a/kernel/x86_64/drot.c b/kernel/x86_64/drot.c index 66e9ff907..ab5048bd1 100644 --- a/kernel/x86_64/drot.c +++ b/kernel/x86_64/drot.c @@ -2,7 +2,7 @@ #if defined(SKYLAKEX) #include "drot_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "drot_microk_haswell-2.c" #endif diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c index d0cea9bee..a021741c7 100644 --- a/kernel/x86_64/sasum.c +++ b/kernel/x86_64/sasum.c @@ -11,7 +11,7 @@ #if defined(SKYLAKEX) #include "sasum_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "sasum_microk_haswell-2.c" #endif diff --git a/kernel/x86_64/srot.c b/kernel/x86_64/srot.c index 3264d251a..587cf8e40 100644 --- a/kernel/x86_64/srot.c +++ b/kernel/x86_64/srot.c @@ -2,7 +2,7 @@ #if defined(SKYLAKEX) #include "srot_microk_skylakex-2.c" -#elif defined(HASWELL) +#elif defined(HASWELL) || defined(ZEN) #include "srot_microk_haswell-2.c" #endif diff --git a/param.h b/param.h index 6a790ab61..9ba25de6a 100644 --- a/param.h +++ b/param.h @@ -2443,8 +2443,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 8 +#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) +#define DGEMM_DEFAULT_UNROLL_M 16 +#define DGEMM_DEFAULT_UNROLL_N 4 +#else #define DGEMM_DEFAULT_UNROLL_M 8 #define DGEMM_DEFAULT_UNROLL_N 8 +#endif #define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_M 8