From 579be3aa9d0e196fc9cc91f6e1f2372e87638f78 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 9 Feb 2020 23:28:04 +0100 Subject: [PATCH 1/9] Add configuration option for BUFFER_SIZE --- Makefile.rule | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Makefile.rule b/Makefile.rule index 21b7e138a..724a60ec4 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -97,6 +97,15 @@ VERSION = 0.3.9.dev # they need to wait for the preceding API calls to finish or risk data corruption. # NUM_PARALLEL = 2 +# When multithreading, OpenBLAS needs to use a memory buffer for communicating +# and collating results for individual subranges of the original matrix. Since +# the original GotoBLAS of the early 2000s, the default size of this buffer has +# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC. +# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment +# this line and adjust the (32< Date: Sun, 9 Feb 2020 23:30:22 +0100 Subject: [PATCH 2/9] Make BUFFER_SIZE configurable --- common_x86_64.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/common_x86_64.h b/common_x86_64.h index c05998d58..fe5539abe 100644 --- a/common_x86_64.h +++ b/common_x86_64.h @@ -225,7 +225,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ #endif #define HUGE_PAGESIZE ( 2 << 20) +#ifndef BUFFERSIZE #define BUFFER_SIZE (32 << 20) +#else +#define BUFFER_SIZE (32 << BUFFERSIZE) +#endif #define SEEK_ADDRESS From 7f0d523b42feb70e7b8ad299d8005d73f620f219 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 9 Feb 2020 23:32:57 +0100 Subject: [PATCH 3/9] Make BUFFER_SIZE configurable --- cmake/system.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index 4f8011603..ce980a7b9 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -289,6 +289,10 @@ set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}") set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}") +if (BUFFERSIZE) +set(CCOMMON_OPT "${CCOMMON_OPT} -DBUFFERSIZE=${BUFFERSIZE}") +endif () + if (USE_SIMPLE_THREADED_LEVEL3) set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3") endif () From 754433f4208832d14119b398e8dd46fbc448a828 Mon Sep 17 00:00:00 2001 From: gxw Date: Mon, 10 Feb 2020 19:11:45 +0800 Subject: [PATCH 4/9] =?UTF-8?q?Avoid=20printing=20the=20following=20inform?= =?UTF-8?q?ation=20on=20mips=20and=20mips64=20when=20check=20msa:=20"unrec?= =?UTF-8?q?ognized=20command=20line=20option=20=E2=80=98-mmsa=E2=80=99"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- c_check | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/c_check b/c_check index fbd1838aa..555b2eccf 100644 --- a/c_check +++ b/c_check @@ -195,7 +195,7 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) { print $tmpf "void main(void){ __asm__ volatile($code); }\n"; $args = "$msa_flags -o $tmpf.o $tmpf"; - my @cmd = ("$compiler_name $args"); + my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null"); system(@cmd) == 0; if ($? != 0) { $have_msa = 0; From 303bdb673b8ef7b9e2ccdbb331827e00a1293951 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 10 Feb 2020 19:17:32 +0100 Subject: [PATCH 5/9] Fix coretype detection for Intel extended models 6 and 7 affecting Goldmont, Cannon Lake, Ice Lake autodetection --- cpuid_x86.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/cpuid_x86.c b/cpuid_x86.c index 9e1c8e752..e29adecae 100644 --- a/cpuid_x86.c +++ b/cpuid_x86.c @@ -2006,6 +2006,38 @@ int get_coretype(void){ return CORE_NEHALEM; } break; + case 6: + if (model == 6) +#ifndef NO_AVX512 + return CORE_SKYLAKEX; +#else + if(support_avx()) +#ifndef NO_AVX2 + return CORE_HASWELL; +#else + return CORE_SANDYBRIDGE; +#endif + else + return CORE_NEHALEM; +#endif + break; + case 7: + if (model == 10) + return CORE_NEHALEM; + if (model == 14) +#ifndef NO_AVX512 + return CORE_SKYLAKEX; +#else + if(support_avx()) +#ifndef NO_AVX2 + return CORE_HASWELL; +#else + return CORE_SANDYBRIDGE; +#endif + else + return CORE_NEHALEM; +#endif + break; case 9: case 8: if (model == 14) { // Kaby Lake From 7e5cbb6f3554342151c522ad4ab20111bf48f5d3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 10 Feb 2020 21:17:39 +0100 Subject: [PATCH 6/9] Fix bad conditional syntax that caused spurious application of USE_TRMM --- kernel/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index ad15b8f25..b3310e87e 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -121,8 +121,10 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) # Makefile.L3 set(USE_TRMM false) - - if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex") + if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) ) + set(USE_TRMM true) + endif () + if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9)) set(USE_TRMM true) endif () From dff173e50e01d94e0741e4b4eaa1cf0aa01cf320 Mon Sep 17 00:00:00 2001 From: Susi Lehtola Date: Tue, 11 Feb 2020 14:46:30 +1300 Subject: [PATCH 7/9] Fix typo in dynamic_zarch.c --- driver/others/dynamic_zarch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/dynamic_zarch.c b/driver/others/dynamic_zarch.c index 1206bf870..896e65bb4 100644 --- a/driver/others/dynamic_zarch.c +++ b/driver/others/dynamic_zarch.c @@ -31,7 +31,7 @@ char* gotoblas_corename(void) { } // __builtin_cpu_is is not supported by zarch -static gotolabs_t* get_coretype(void) { +static gotoblas_t* get_coretype(void) { FILE* infile; char buffer[512], * p; From 5a6bba3061f19923eb9972378021e6498bf8e5ed Mon Sep 17 00:00:00 2001 From: Susi Lehtola Date: Tue, 11 Feb 2020 15:07:33 +1300 Subject: [PATCH 8/9] Patch out instances of Z15 in dynamic_zarch.c There does not appear to be a Z15 kernel yet, causing link errors from the code. This patch fixes the issue. --- driver/others/dynamic_zarch.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/driver/others/dynamic_zarch.c b/driver/others/dynamic_zarch.c index 1206bf870..c7b82e4df 100644 --- a/driver/others/dynamic_zarch.c +++ b/driver/others/dynamic_zarch.c @@ -3,12 +3,12 @@ extern gotoblas_t gotoblas_Z13; extern gotoblas_t gotoblas_Z14; -extern gotoblas_t gotoblas_Z15; +//extern gotoblas_t gotoblas_Z15; //#if (!defined C_GCC) || (GCC_VERSION >= 60000) //extern gotoblas_t gotoblas_Z14; //#endif -#define NUM_CORETYPES 5 +#define NUM_CORETYPES 4 extern void openblas_warning(int verbose, const char* msg); @@ -16,14 +16,14 @@ static char* corename[] = { "unknown", "Z13", "Z14", - "Z15", +// "Z15", "ZARCH_GENERIC", }; char* gotoblas_corename(void) { if (gotoblas == &gotoblas_Z13) return corename[1]; if (gotoblas == &gotoblas_Z14) return corename[2]; - if (gotoblas == &gotoblas_Z15) return corename[3]; +// if (gotoblas == &gotoblas_Z15) return corename[3]; //#if (!defined C_GCC) || (GCC_VERSION >= 60000) // if (gotoblas == &gotoblas_POWER9) return corename[3]; //#endif @@ -78,7 +78,7 @@ static gotoblas_t* force_coretype(char* coretype) { { case 1: return (&gotoblas_Z13); case 2: return (&gotoblas_Z14); - case 3: return (&gotoblas_Z15); +// case 3: return (&gotoblas_Z15); //#if (!defined C_GCC) || (GCC_VERSION >= 60000) // case 3: return (&gotoblas_POWER9); //#endif From 7ea5e07d1cb59834428d982818b7cf565dcda4df Mon Sep 17 00:00:00 2001 From: Bart Oldeman Date: Wed, 12 Feb 2020 14:11:44 +0000 Subject: [PATCH 9/9] Fix inline asm in dscal: mark x, x1 as clobbered. Fixes #2408 The leaq instructions in dscal_kernel_inc_8 modify x and x1 so they must be declared as input/output constraints, otherwise the compiler may assume the corresponding registers are not modified. --- kernel/x86_64/dscal.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/x86_64/dscal.c b/kernel/x86_64/dscal.c index d0d7801fd..e2436f789 100644 --- a/kernel/x86_64/dscal.c +++ b/kernel/x86_64/dscal.c @@ -136,10 +136,10 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_ "jnz 1b \n\t" : - "+r" (n) // 0 + "+r" (n), // 0 + "+r" (x), // 1 + "+r" (x1) // 2 : - "r" (x), // 1 - "r" (x1), // 2 "r" (alpha), // 3 "r" (inc_x), // 4 "r" (inc_x3) // 5