commit
616921fd91
|
@ -97,6 +97,15 @@ VERSION = 0.3.9.dev
|
||||||
# they need to wait for the preceding API calls to finish or risk data corruption.
|
# they need to wait for the preceding API calls to finish or risk data corruption.
|
||||||
# NUM_PARALLEL = 2
|
# NUM_PARALLEL = 2
|
||||||
|
|
||||||
|
# When multithreading, OpenBLAS needs to use a memory buffer for communicating
|
||||||
|
# and collating results for individual subranges of the original matrix. Since
|
||||||
|
# the original GotoBLAS of the early 2000s, the default size of this buffer has
|
||||||
|
# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC.
|
||||||
|
# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment
|
||||||
|
# this line and adjust the (32<<n) factor if necessary. Usually an insufficient value
|
||||||
|
# manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc)
|
||||||
|
# BUFFERSIZE = 25
|
||||||
|
|
||||||
# If you don't need to install the static library, please comment this in.
|
# If you don't need to install the static library, please comment this in.
|
||||||
# NO_STATIC = 1
|
# NO_STATIC = 1
|
||||||
|
|
||||||
|
|
2
c_check
2
c_check
|
@ -195,7 +195,7 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
||||||
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||||
|
|
||||||
$args = "$msa_flags -o $tmpf.o $tmpf";
|
$args = "$msa_flags -o $tmpf.o $tmpf";
|
||||||
my @cmd = ("$compiler_name $args");
|
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
|
||||||
system(@cmd) == 0;
|
system(@cmd) == 0;
|
||||||
if ($? != 0) {
|
if ($? != 0) {
|
||||||
$have_msa = 0;
|
$have_msa = 0;
|
||||||
|
|
|
@ -289,6 +289,10 @@ set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}")
|
||||||
|
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_PARALLEL_NUMBER=${NUM_PARALLEL}")
|
||||||
|
|
||||||
|
if (BUFFERSIZE)
|
||||||
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DBUFFERSIZE=${BUFFERSIZE}")
|
||||||
|
endif ()
|
||||||
|
|
||||||
if (USE_SIMPLE_THREADED_LEVEL3)
|
if (USE_SIMPLE_THREADED_LEVEL3)
|
||||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
|
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3")
|
||||||
endif ()
|
endif ()
|
||||||
|
|
|
@ -225,7 +225,11 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
||||||
#endif
|
#endif
|
||||||
#define HUGE_PAGESIZE ( 2 << 20)
|
#define HUGE_PAGESIZE ( 2 << 20)
|
||||||
|
|
||||||
|
#ifndef BUFFERSIZE
|
||||||
#define BUFFER_SIZE (32 << 20)
|
#define BUFFER_SIZE (32 << 20)
|
||||||
|
#else
|
||||||
|
#define BUFFER_SIZE (32 << BUFFERSIZE)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SEEK_ADDRESS
|
#define SEEK_ADDRESS
|
||||||
|
|
||||||
|
|
32
cpuid_x86.c
32
cpuid_x86.c
|
@ -2006,6 +2006,38 @@ int get_coretype(void){
|
||||||
return CORE_NEHALEM;
|
return CORE_NEHALEM;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 6:
|
||||||
|
if (model == 6)
|
||||||
|
#ifndef NO_AVX512
|
||||||
|
return CORE_SKYLAKEX;
|
||||||
|
#else
|
||||||
|
if(support_avx())
|
||||||
|
#ifndef NO_AVX2
|
||||||
|
return CORE_HASWELL;
|
||||||
|
#else
|
||||||
|
return CORE_SANDYBRIDGE;
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
if (model == 10)
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
if (model == 14)
|
||||||
|
#ifndef NO_AVX512
|
||||||
|
return CORE_SKYLAKEX;
|
||||||
|
#else
|
||||||
|
if(support_avx())
|
||||||
|
#ifndef NO_AVX2
|
||||||
|
return CORE_HASWELL;
|
||||||
|
#else
|
||||||
|
return CORE_SANDYBRIDGE;
|
||||||
|
#endif
|
||||||
|
else
|
||||||
|
return CORE_NEHALEM;
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
case 9:
|
case 9:
|
||||||
case 8:
|
case 8:
|
||||||
if (model == 14) { // Kaby Lake
|
if (model == 14) { // Kaby Lake
|
||||||
|
|
|
@ -3,12 +3,12 @@
|
||||||
|
|
||||||
extern gotoblas_t gotoblas_Z13;
|
extern gotoblas_t gotoblas_Z13;
|
||||||
extern gotoblas_t gotoblas_Z14;
|
extern gotoblas_t gotoblas_Z14;
|
||||||
extern gotoblas_t gotoblas_Z15;
|
//extern gotoblas_t gotoblas_Z15;
|
||||||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||||
//extern gotoblas_t gotoblas_Z14;
|
//extern gotoblas_t gotoblas_Z14;
|
||||||
//#endif
|
//#endif
|
||||||
|
|
||||||
#define NUM_CORETYPES 5
|
#define NUM_CORETYPES 4
|
||||||
|
|
||||||
extern void openblas_warning(int verbose, const char* msg);
|
extern void openblas_warning(int verbose, const char* msg);
|
||||||
|
|
||||||
|
@ -16,14 +16,14 @@ static char* corename[] = {
|
||||||
"unknown",
|
"unknown",
|
||||||
"Z13",
|
"Z13",
|
||||||
"Z14",
|
"Z14",
|
||||||
"Z15",
|
// "Z15",
|
||||||
"ZARCH_GENERIC",
|
"ZARCH_GENERIC",
|
||||||
};
|
};
|
||||||
|
|
||||||
char* gotoblas_corename(void) {
|
char* gotoblas_corename(void) {
|
||||||
if (gotoblas == &gotoblas_Z13) return corename[1];
|
if (gotoblas == &gotoblas_Z13) return corename[1];
|
||||||
if (gotoblas == &gotoblas_Z14) return corename[2];
|
if (gotoblas == &gotoblas_Z14) return corename[2];
|
||||||
if (gotoblas == &gotoblas_Z15) return corename[3];
|
// if (gotoblas == &gotoblas_Z15) return corename[3];
|
||||||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||||
// if (gotoblas == &gotoblas_POWER9) return corename[3];
|
// if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||||
//#endif
|
//#endif
|
||||||
|
@ -31,7 +31,7 @@ char* gotoblas_corename(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// __builtin_cpu_is is not supported by zarch
|
// __builtin_cpu_is is not supported by zarch
|
||||||
static gotolabs_t* get_coretype(void) {
|
static gotoblas_t* get_coretype(void) {
|
||||||
FILE* infile;
|
FILE* infile;
|
||||||
char buffer[512], * p;
|
char buffer[512], * p;
|
||||||
|
|
||||||
|
@ -78,7 +78,7 @@ static gotoblas_t* force_coretype(char* coretype) {
|
||||||
{
|
{
|
||||||
case 1: return (&gotoblas_Z13);
|
case 1: return (&gotoblas_Z13);
|
||||||
case 2: return (&gotoblas_Z14);
|
case 2: return (&gotoblas_Z14);
|
||||||
case 3: return (&gotoblas_Z15);
|
// case 3: return (&gotoblas_Z15);
|
||||||
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||||
// case 3: return (&gotoblas_POWER9);
|
// case 3: return (&gotoblas_POWER9);
|
||||||
//#endif
|
//#endif
|
||||||
|
|
|
@ -121,8 +121,10 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||||
|
|
||||||
# Makefile.L3
|
# Makefile.L3
|
||||||
set(USE_TRMM false)
|
set(USE_TRMM false)
|
||||||
|
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) )
|
||||||
if (ARM OR ARM64 OR "${TARGET_CORE}" STREQUAL "LONGSOON3B" OR "${TARGET_CORE}" STREQUAL "GENERIC" OR "${CORE}" STREQUAL "generic" OR "${TARGET_CORE}" STREQUAL "HASWELL" OR "${CORE}" STREQUAL "haswell" OR "${CORE}" STREQUAL "zen" OR "${TARGET_CORE}" STREQUAL "SKYLAKEX" OR "${CORE}" STREQUAL "skylakex")
|
set(USE_TRMM true)
|
||||||
|
endif ()
|
||||||
|
if (ZARCH OR (TARGET_CORE MATCHES POWER8) OR (TARGET_CORE MATCHES POWER9))
|
||||||
set(USE_TRMM true)
|
set(USE_TRMM true)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
|
|
|
@ -136,10 +136,10 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_
|
||||||
"jnz 1b \n\t"
|
"jnz 1b \n\t"
|
||||||
|
|
||||||
:
|
:
|
||||||
"+r" (n) // 0
|
"+r" (n), // 0
|
||||||
|
"+r" (x), // 1
|
||||||
|
"+r" (x1) // 2
|
||||||
:
|
:
|
||||||
"r" (x), // 1
|
|
||||||
"r" (x1), // 2
|
|
||||||
"r" (alpha), // 3
|
"r" (alpha), // 3
|
||||||
"r" (inc_x), // 4
|
"r" (inc_x), // 4
|
||||||
"r" (inc_x3) // 5
|
"r" (inc_x3) // 5
|
||||||
|
|
Loading…
Reference in New Issue