Merge pull request #13 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2021-02-22 19:31:41 +01:00 committed by GitHub
commit 86a5f98e4a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 67 additions and 25 deletions

View File

@ -2499,6 +2499,5 @@ foreach (Utils_FILE ${Utils_SRC})
endforeach () endforeach ()
set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include") set(lapacke_include_dir "${NETLIB_LAPACK_DIR}/LAPACKE/include")
configure_file("${lapacke_include_dir}/lapacke_mangling_with_flags.h.in" "${lapacke_include_dir}/lapacke_mangling.h" COPYONLY)
include_directories(${lapacke_include_dir}) include_directories(${lapacke_include_dir})
set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}") set_source_files_properties(${LAPACKE_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_CFLAGS}")

View File

@ -1418,6 +1418,15 @@ int get_cpuname(void){
case 9: case 9:
case 8: case 8:
switch (model) { switch (model) {
case 12: // Tiger Lake
if(support_avx512())
return CPUTYPE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
case 14: // Kaby Lake and refreshes case 14: // Kaby Lake and refreshes
if(support_avx2()) if(support_avx2())
return CPUTYPE_HASWELL; return CPUTYPE_HASWELL;
@ -2124,6 +2133,16 @@ int get_coretype(void){
break; break;
case 9: case 9:
case 8: case 8:
if (model == 12) { // Tiger Lake
if(support_avx512())
return CPUTYPE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
}
if (model == 14) { // Kaby Lake if (model == 14) { // Kaby Lake
if(support_avx()) if(support_avx())
#ifndef NO_AVX2 #ifndef NO_AVX2

View File

@ -1024,38 +1024,39 @@ int BLASFUNC(blas_thread_shutdown)(void){
int i; int i;
if (!blas_server_avail) return 0;
LOCK_COMMAND(&server_lock); LOCK_COMMAND(&server_lock);
for (i = 0; i < blas_num_threads - 1; i++) { if (blas_server_avail) {
for (i = 0; i < blas_num_threads - 1; i++) {
pthread_mutex_lock (&thread_status[i].lock); pthread_mutex_lock (&thread_status[i].lock);
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1); atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)-1);
thread_status[i].status = THREAD_STATUS_WAKEUP; thread_status[i].status = THREAD_STATUS_WAKEUP;
pthread_cond_signal (&thread_status[i].wakeup); pthread_cond_signal (&thread_status[i].wakeup);
pthread_mutex_unlock(&thread_status[i].lock); pthread_mutex_unlock(&thread_status[i].lock);
} }
for(i = 0; i < blas_num_threads - 1; i++){ for(i = 0; i < blas_num_threads - 1; i++){
pthread_join(blas_threads[i], NULL); pthread_join(blas_threads[i], NULL);
} }
for(i = 0; i < blas_num_threads - 1; i++){ for(i = 0; i < blas_num_threads - 1; i++){
pthread_mutex_destroy(&thread_status[i].lock); pthread_mutex_destroy(&thread_status[i].lock);
pthread_cond_destroy (&thread_status[i].wakeup); pthread_cond_destroy (&thread_status[i].wakeup);
} }
#ifdef NEED_STACKATTR #ifdef NEED_STACKATTR
pthread_attr_destory(&attr); pthread_attr_destroy(&attr);
#endif #endif
blas_server_avail = 0; blas_server_avail = 0;
}
UNLOCK_COMMAND(&server_lock); UNLOCK_COMMAND(&server_lock);
return 0; return 0;

View File

@ -644,6 +644,21 @@ static gotoblas_t *get_coretype(void){
return NULL; return NULL;
case 9: case 9:
case 8: case 8:
if (model == 12) { // Tiger Lake
if (support_avx512())
return &gotoblas_SKYLAKEX;
if(support_avx2()){
openblas_warning(FALLBACK_VERBOSE, HASWELL_FALLBACK);
return &gotoblas_HASWELL;
}
if(support_avx()) {
openblas_warning(FALLBACK_VERBOSE, SANDYBRIDGE_FALLBACK);
return &gotoblas_SANDYBRIDGE;
} else {
openblas_warning(FALLBACK_VERBOSE, NEHALEM_FALLBACK);
return &gotoblas_NEHALEM;
}
}
if (model == 14 ) { // Kaby Lake, Coffee Lake if (model == 14 ) { // Kaby Lake, Coffee Lake
if(support_avx2()) if(support_avx2())
return &gotoblas_HASWELL; return &gotoblas_HASWELL;

View File

@ -222,11 +222,11 @@ int get_num_procs(void);
#else #else
int get_num_procs(void) { int get_num_procs(void) {
static int nums = 0; static int nums = 0;
#if defined(__GLIBC_PREREQ)
cpu_set_t cpuset,*cpusetp; cpu_set_t cpuset,*cpusetp;
size_t size; size_t size;
int ret; int ret;
#if defined(__GLIBC_PREREQ)
#if !__GLIBC_PREREQ(2, 7) #if !__GLIBC_PREREQ(2, 7)
int i; int i;
#if !__GLIBC_PREREQ(2, 6) #if !__GLIBC_PREREQ(2, 6)

View File

@ -75,6 +75,7 @@ if ($compiler eq "") {
} elsif ($data =~ /GNU/ || $data =~ /GCC/ ) { } elsif ($data =~ /GNU/ || $data =~ /GCC/ ) {
$data =~ s/\(+.*?\)+//g;
$data =~ /(\d+)\.(\d+).(\d+)/; $data =~ /(\d+)\.(\d+).(\d+)/;
$major = $1; $major = $1;
$minor = $2; $minor = $2;

View File

@ -97,3 +97,5 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c
SROTKERNEL = srot.c
DROTKERNEL = drot.c

View File

@ -6,7 +6,7 @@
#if defined(SKYLAKEX) #if defined(SKYLAKEX)
#include "dasum_microk_skylakex-2.c" #include "dasum_microk_skylakex-2.c"
#elif defined(HASWELL) #elif defined(HASWELL) || defined(ZEN)
#include "dasum_microk_haswell-2.c" #include "dasum_microk_haswell-2.c"
#endif #endif

View File

@ -2,7 +2,7 @@
#if defined(SKYLAKEX) #if defined(SKYLAKEX)
#include "drot_microk_skylakex-2.c" #include "drot_microk_skylakex-2.c"
#elif defined(HASWELL) #elif defined(HASWELL) || defined(ZEN)
#include "drot_microk_haswell-2.c" #include "drot_microk_haswell-2.c"
#endif #endif

View File

@ -11,7 +11,7 @@
#if defined(SKYLAKEX) #if defined(SKYLAKEX)
#include "sasum_microk_skylakex-2.c" #include "sasum_microk_skylakex-2.c"
#elif defined(HASWELL) #elif defined(HASWELL) || defined(ZEN)
#include "sasum_microk_haswell-2.c" #include "sasum_microk_haswell-2.c"
#endif #endif

View File

@ -2,7 +2,7 @@
#if defined(SKYLAKEX) #if defined(SKYLAKEX)
#include "srot_microk_skylakex-2.c" #include "srot_microk_skylakex-2.c"
#elif defined(HASWELL) #elif defined(HASWELL) || defined(ZEN)
#include "srot_microk_haswell-2.c" #include "srot_microk_haswell-2.c"
#endif #endif

View File

@ -2443,8 +2443,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 8 #define SGEMM_DEFAULT_UNROLL_N 8
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define DGEMM_DEFAULT_UNROLL_M 16
#define DGEMM_DEFAULT_UNROLL_N 4
#else
#define DGEMM_DEFAULT_UNROLL_M 8 #define DGEMM_DEFAULT_UNROLL_M 8
#define DGEMM_DEFAULT_UNROLL_N 8 #define DGEMM_DEFAULT_UNROLL_N 8
#endif
#define CGEMM_DEFAULT_UNROLL_M 8 #define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_M 8 #define ZGEMM_DEFAULT_UNROLL_M 8