Merge branch 'develop' into win_tidy
This commit is contained in:
@@ -113,6 +113,8 @@ extern unsigned int openblas_thread_timeout(void);
|
||||
/* We need this global for checking if initialization is finished. */
|
||||
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
|
||||
|
||||
int blas_omp_threads_local = 1;
|
||||
|
||||
/* Local Variables */
|
||||
#if defined(USE_PTHREAD_LOCK)
|
||||
static pthread_mutex_t server_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
@@ -69,6 +69,7 @@
|
||||
|
||||
int blas_server_avail = 0;
|
||||
int blas_omp_number_max = 0;
|
||||
int blas_omp_threads_local = 1;
|
||||
|
||||
extern int openblas_omp_adaptive_env(void);
|
||||
|
||||
|
||||
@@ -65,6 +65,8 @@ static CRITICAL_SECTION queue_lock;
|
||||
/* We need this global for checking if initialization is finished. */
|
||||
int blas_server_avail = 0;
|
||||
|
||||
int blas_omp_threads_local = 1;
|
||||
|
||||
/* Local Variables */
|
||||
static BLASULONG server_lock = 0;
|
||||
|
||||
|
||||
@@ -275,6 +275,7 @@ extern gotoblas_t gotoblas_EXCAVATOR;
|
||||
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
|
||||
#define gotoblas_COOPERLAKE gotoblas_SANDYBRIDGE
|
||||
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
|
||||
#define gotoblas_SAPPHIRERAPIDS gotoblas_SANDYBRIDGE
|
||||
#else
|
||||
extern gotoblas_t gotoblas_HASWELL;
|
||||
extern gotoblas_t gotoblas_ZEN;
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* Copyright 2023 The OpenBLAS Project */
|
||||
/* Copyright 2023-2024 The OpenBLAS Project */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
@@ -122,10 +122,11 @@ extern gotoblas_t gotoblas_CORTEXA55;
|
||||
#endif
|
||||
#else
|
||||
extern gotoblas_t gotoblas_CORTEXA53;
|
||||
#define gotoblas_CORTEXA55 gotoblas_CORTEXA53
|
||||
extern gotoblas_t gotoblas_CORTEXA57;
|
||||
extern gotoblas_t gotoblas_CORTEXA72;
|
||||
extern gotoblas_t gotoblas_CORTEXA73;
|
||||
extern gotoblas_t gotoblas_FALKOR;
|
||||
#define gotoblas_CORTEXA72 gotoblas_CORTEXA57
|
||||
#define gotoblas_CORTEXA73 gotoblas_CORTEXA57
|
||||
#define gotoblas_FALKOR gotoblas_CORTEXA57
|
||||
extern gotoblas_t gotoblas_THUNDERX;
|
||||
extern gotoblas_t gotoblas_THUNDERX2T99;
|
||||
extern gotoblas_t gotoblas_TSV110;
|
||||
@@ -141,14 +142,14 @@ extern gotoblas_t gotoblas_ARMV8SVE;
|
||||
#define gotoblas_ARMV8SVE gotoblas_ARMV8
|
||||
#endif
|
||||
extern gotoblas_t gotoblas_THUNDERX3T110;
|
||||
extern gotoblas_t gotoblas_CORTEXA55;
|
||||
#endif
|
||||
#define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEV1
|
||||
|
||||
extern void openblas_warning(int verbose, const char * msg);
|
||||
#define FALLBACK_VERBOSE 1
|
||||
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
|
||||
|
||||
#define NUM_CORETYPES 16
|
||||
#define NUM_CORETYPES 17
|
||||
|
||||
/*
|
||||
* In case asm/hwcap.h is outdated on the build system, make sure
|
||||
@@ -178,6 +179,7 @@ static char *corename[] = {
|
||||
"emag8180",
|
||||
"neoversen1",
|
||||
"neoversev1",
|
||||
"neoversev2",
|
||||
"neoversen2",
|
||||
"thunderx3t110",
|
||||
"cortexa55",
|
||||
@@ -198,10 +200,11 @@ char *gotoblas_corename(void) {
|
||||
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
|
||||
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
|
||||
if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
|
||||
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[12];
|
||||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[13];
|
||||
if (gotoblas == &gotoblas_CORTEXA55) return corename[14];
|
||||
if (gotoblas == &gotoblas_ARMV8SVE) return corename[15];
|
||||
if (gotoblas == &gotoblas_NEOVERSEV2) return corename[12];
|
||||
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
|
||||
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
|
||||
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
|
||||
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
|
||||
return corename[NUM_CORETYPES];
|
||||
}
|
||||
|
||||
@@ -233,10 +236,11 @@ static gotoblas_t *force_coretype(char *coretype) {
|
||||
case 9: return (&gotoblas_EMAG8180);
|
||||
case 10: return (&gotoblas_NEOVERSEN1);
|
||||
case 11: return (&gotoblas_NEOVERSEV1);
|
||||
case 12: return (&gotoblas_NEOVERSEN2);
|
||||
case 13: return (&gotoblas_THUNDERX3T110);
|
||||
case 14: return (&gotoblas_CORTEXA55);
|
||||
case 15: return (&gotoblas_ARMV8SVE);
|
||||
case 12: return (&gotoblas_NEOVERSEV2);
|
||||
case 13: return (&gotoblas_NEOVERSEN2);
|
||||
case 14: return (&gotoblas_THUNDERX3T110);
|
||||
case 15: return (&gotoblas_CORTEXA55);
|
||||
case 16: return (&gotoblas_ARMV8SVE);
|
||||
}
|
||||
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||
openblas_warning(1, message);
|
||||
@@ -247,6 +251,10 @@ static gotoblas_t *get_coretype(void) {
|
||||
int implementer, variant, part, arch, revision, midr_el1;
|
||||
char coremsg[128];
|
||||
|
||||
#if defined (OS_DARWIN)
|
||||
return &gotoblas_NEOVERSEN1;
|
||||
#endif
|
||||
|
||||
#if (!defined OS_LINUX && !defined OS_ANDROID)
|
||||
return NULL;
|
||||
#else
|
||||
@@ -308,6 +316,13 @@ static gotoblas_t *get_coretype(void) {
|
||||
return &gotoblas_NEOVERSEN1;
|
||||
}else
|
||||
return &gotoblas_NEOVERSEV1;
|
||||
case 0xd4f:
|
||||
if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
|
||||
openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
|
||||
return &gotoblas_NEOVERSEN1;
|
||||
} else {
|
||||
return &gotoblas_NEOVERSEV2;
|
||||
}
|
||||
#endif
|
||||
case 0xd05: // Cortex A55
|
||||
return &gotoblas_CORTEXA55;
|
||||
@@ -352,6 +367,9 @@ static gotoblas_t *get_coretype(void) {
|
||||
return &gotoblas_FALKOR;
|
||||
}
|
||||
break;
|
||||
case 0x61: // Apple
|
||||
return &gotoblas_NEOVERSEN1;
|
||||
break;
|
||||
default:
|
||||
snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
|
||||
openblas_warning(1, coremsg);
|
||||
|
||||
@@ -43,6 +43,13 @@ char *gotoblas_corename(void) {
|
||||
#define CPU_POWER9 9
|
||||
#define CPU_POWER10 10
|
||||
|
||||
#ifndef POWER_9
|
||||
#define POWER_9 0x20000 /* 9 class CPU */
|
||||
#endif
|
||||
#ifndef POWER_10
|
||||
#define POWER_10 0x40000 /* 10 class CPU */
|
||||
#endif
|
||||
|
||||
#ifdef _AIX
|
||||
#include <sys/systemcfg.h>
|
||||
|
||||
@@ -62,7 +69,7 @@ static int cpuid(void)
|
||||
else if (arch == POWER_9) return CPU_POWER9;
|
||||
#endif
|
||||
#ifdef POWER_10
|
||||
else if (arch == POWER_10) return CPU_POWER10;
|
||||
else if (arch >= POWER_10) return CPU_POWER10;
|
||||
#endif
|
||||
return CPU_UNKNOWN;
|
||||
}
|
||||
@@ -332,6 +339,9 @@ void gotoblas_dynamic_init(void) {
|
||||
if (gotoblas && gotoblas -> init) {
|
||||
strncpy(coren,gotoblas_corename(),20);
|
||||
sprintf(coremsg, "Core: %s\n",coren);
|
||||
if (getenv("GET_OPENBLAS_CORETYPE")) {
|
||||
fprintf(stderr, "%s", coremsg);
|
||||
}
|
||||
openblas_warning(2, coremsg);
|
||||
gotoblas -> init();
|
||||
} else {
|
||||
|
||||
@@ -3214,7 +3214,7 @@ void blas_shutdown(void){
|
||||
#endif
|
||||
memory[pos].lock = 0;
|
||||
}
|
||||
if (memory_overflowed)
|
||||
if (memory_overflowed) {
|
||||
for (pos = 0; pos < NEW_BUFFERS; pos ++){
|
||||
newmemory[pos].addr = (void *)0;
|
||||
newmemory[pos].used = 0;
|
||||
@@ -3222,6 +3222,10 @@ void blas_shutdown(void){
|
||||
newmemory[pos].pos = -1;
|
||||
#endif
|
||||
newmemory[pos].lock = 0;
|
||||
}
|
||||
free(newmemory);
|
||||
newmemory = NULL;
|
||||
memory_overflowed = 0;
|
||||
}
|
||||
|
||||
UNLOCK_COMMAND(&alloc_lock);
|
||||
|
||||
@@ -36,11 +36,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#ifdef SMP_SERVER
|
||||
|
||||
extern void openblas_set_num_threads(int num_threads) ;
|
||||
extern int openblas_get_num_threads(void) ;
|
||||
|
||||
void openblas_set_num_threads_(int* num_threads){
|
||||
openblas_set_num_threads(*num_threads);
|
||||
}
|
||||
|
||||
int openblas_set_num_threads_local(int num_threads){
|
||||
int ret = openblas_get_num_threads();
|
||||
openblas_set_num_threads(num_threads);
|
||||
blas_omp_threads_local=num_threads;
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
#else
|
||||
//Single thread
|
||||
|
||||
@@ -50,4 +59,8 @@ void openblas_set_num_threads(int num_threads) {
|
||||
void openblas_set_num_threads_(int* num_threads){
|
||||
|
||||
}
|
||||
|
||||
int openblas_set_num_threads_local(int num_threads){
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user