Merge pull request #27 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2020-02-12 19:16:14 +01:00 committed by GitHub
commit 616921fd91
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 63 additions and 12 deletions

View File

@ -97,6 +97,15 @@ VERSION = 0.3.9.dev
# they need to wait for the preceding API calls to finish or risk data corruption.
# NUM_PARALLEL = 2
# When multithreading, OpenBLAS needs to use a memory buffer for communicating
# and collating results for individual subranges of the original matrix. Since
# the original GotoBLAS of the early 2000s, the default size of this buffer has
# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC.
# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment
# this line and adjust the (32<<n) factor if necessary. Usually an insufficient value
# manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc)
# BUFFERSIZE = 25
# If you don't need to install the static library, please comment this in.
# NO_STATIC = 1

View File

@ -195,7 +195,7 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) {
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
$args = "$msa_flags -o $tmpf.o $tmpf";
my @cmd = ("$compiler_name $args");
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$have_msa = 0;

View File

@ -289,6 +289,10 @@ set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
if (BUFFERSIZE)
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUFFERSIZE=${BUFFERSIZE}")
endif ()
if (USE_SIMPLE_THREADED_LEVEL3)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
endif ()

View File

@ -225,7 +225,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
#endif
#define HUGE_PAGESIZE ( 2 << 20)
#ifndef BUFFERSIZE
#define BUFFER_SIZE (32 << 20)
#else
#define BUFFER_SIZE (32 << BUFFERSIZE)
#endif
#define SEEK_ADDRESS

View File

@ -2006,6 +2006,38 @@ int get_coretype(void){
return CORE_NEHALEM;
}
break;
case 6:
if (model == 6)
#ifndef NO_AVX512
return CORE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
#endif
break;
case 7:
if (model == 10)
return CORE_NEHALEM;
if (model == 14)
#ifndef NO_AVX512
return CORE_SKYLAKEX;
#else
if(support_avx())
#ifndef NO_AVX2
return CORE_HASWELL;
#else
return CORE_SANDYBRIDGE;
#endif
else
return CORE_NEHALEM;
#endif
break;
case 9:
case 8:
if (model == 14) { // Kaby Lake

View File

@ -3,12 +3,12 @@
extern gotoblas_t gotoblas_Z13;
extern gotoblas_t gotoblas_Z14;
extern gotoblas_t gotoblas_Z15;
//extern gotoblas_t gotoblas_Z15;
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
//extern gotoblas_t gotoblas_Z14;
//#endif
#define NUM_CORETYPES 5
#define NUM_CORETYPES 4
extern void openblas_warning(int verbose, const char* msg);
@ -16,14 +16,14 @@ static char* corename[] = {
"unknown",
"Z13",
"Z14",
"Z15",
// "Z15",
"ZARCH_GENERIC",
};
char* gotoblas_corename(void) {
if (gotoblas == &gotoblas_Z13) return corename[1];
if (gotoblas == &gotoblas_Z14) return corename[2];
if (gotoblas == &gotoblas_Z15) return corename[3];
// if (gotoblas == &gotoblas_Z15) return corename[3];
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
// if (gotoblas == &gotoblas_POWER9) return corename[3];
//#endif
@ -31,7 +31,7 @@ char* gotoblas_corename(void) {
}
// __builtin_cpu_is is not supported by zarch
static gotolabs_t* get_coretype(void) {
static gotoblas_t* get_coretype(void) {
FILE* infile;
char buffer[512], * p;
@ -78,7 +78,7 @@ static gotoblas_t* force_coretype(char* coretype) {
{
case 1: return (&gotoblas_Z13);
case 2: return (&gotoblas_Z14);
case 3: return (&gotoblas_Z15);
// case 3: return (&gotoblas_Z15);
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
// case 3: return (&gotoblas_POWER9);
//#endif

View File

@ -121,8 +121,10 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
# Makefile.L3
set(USE_TRMM false)
if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex")
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) )
set(USE_TRMM true)
endif ()
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9))
set(USE_TRMM true)
endif ()

View File

@ -136,10 +136,10 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
"jnz 1b \n\t"
:
"+r" (n) // 0
"+r" (n), // 0
"+r" (x), // 1
"+r" (x1) // 2
:
"r" (x), // 1
"r" (x1), // 2
"r" (alpha), // 3
"r" (inc_x), // 4
"r" (inc_x3) // 5