Merge branch 'develop' into win_tidy

This commit is contained in:
Mark Seminatore
2024-02-12 10:23:17 -08:00
committed by GitHub
621 changed files with 96981 additions and 21930 deletions

View File

@@ -113,6 +113,8 @@ extern unsigned int openblas_thread_timeout(void);
/* We need this global for checking if initialization is finished. */
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
int blas_omp_threads_local = 1;
/* Local Variables */
#if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t server_lock = PTHREAD_MUTEX_INITIALIZER;

View File

@@ -69,6 +69,7 @@
int blas_server_avail = 0;
int blas_omp_number_max = 0;
int blas_omp_threads_local = 1;
extern int openblas_omp_adaptive_env(void);

View File

@@ -65,6 +65,8 @@ static CRITICAL_SECTION queue_lock;
/* We need this global for checking if initialization is finished. */
int blas_server_avail = 0;
int blas_omp_threads_local = 1;
/* Local Variables */
static BLASULONG server_lock = 0;

View File

@@ -275,6 +275,7 @@ extern gotoblas_t gotoblas_EXCAVATOR;
#define gotoblas_SKYLAKEX gotoblas_SANDYBRIDGE
#define gotoblas_COOPERLAKE gotoblas_SANDYBRIDGE
#define gotoblas_ZEN gotoblas_SANDYBRIDGE
#define gotoblas_SAPPHIRERAPIDS gotoblas_SANDYBRIDGE
#else
extern gotoblas_t gotoblas_HASWELL;
extern gotoblas_t gotoblas_ZEN;

View File

@@ -1,6 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project */
/* Copyright 2023-2024 The OpenBLAS Project */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@@ -122,10 +122,11 @@ extern gotoblas_t gotoblas_CORTEXA55;
#endif
#else
extern gotoblas_t gotoblas_CORTEXA53;
#define gotoblas_CORTEXA55 gotoblas_CORTEXA53
extern gotoblas_t gotoblas_CORTEXA57;
extern gotoblas_t gotoblas_CORTEXA72;
extern gotoblas_t gotoblas_CORTEXA73;
extern gotoblas_t gotoblas_FALKOR;
#define gotoblas_CORTEXA72 gotoblas_CORTEXA57
#define gotoblas_CORTEXA73 gotoblas_CORTEXA57
#define gotoblas_FALKOR gotoblas_CORTEXA57
extern gotoblas_t gotoblas_THUNDERX;
extern gotoblas_t gotoblas_THUNDERX2T99;
extern gotoblas_t gotoblas_TSV110;
@@ -141,14 +142,14 @@ extern gotoblas_t gotoblas_ARMV8SVE;
#define gotoblas_ARMV8SVE gotoblas_ARMV8
#endif
extern gotoblas_t gotoblas_THUNDERX3T110;
extern gotoblas_t gotoblas_CORTEXA55;
#endif
#define gotoblas_NEOVERSEV2 gotoblas_NEOVERSEV1
extern void openblas_warning(int verbose, const char * msg);
#define FALLBACK_VERBOSE 1
#define NEOVERSEN1_FALLBACK "OpenBLAS : Your OS does not support SVE instructions. OpenBLAS is using Neoverse N1 kernels as a fallback, which may give poorer performance.\n"
#define NUM_CORETYPES 16
#define NUM_CORETYPES 17
/*
* In case asm/hwcap.h is outdated on the build system, make sure
@@ -178,6 +179,7 @@ static char *corename[] = {
"emag8180",
"neoversen1",
"neoversev1",
"neoversev2",
"neoversen2",
"thunderx3t110",
"cortexa55",
@@ -198,10 +200,11 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10];
if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11];
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[12];
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[13];
if (gotoblas == &gotoblas_CORTEXA55) return corename[14];
if (gotoblas == &gotoblas_ARMV8SVE) return corename[15];
if (gotoblas == &gotoblas_NEOVERSEV2) return corename[12];
if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13];
if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14];
if (gotoblas == &gotoblas_CORTEXA55) return corename[15];
if (gotoblas == &gotoblas_ARMV8SVE) return corename[16];
return corename[NUM_CORETYPES];
}
@@ -233,10 +236,11 @@ static gotoblas_t *force_coretype(char *coretype) {
case 9: return (&gotoblas_EMAG8180);
case 10: return (&gotoblas_NEOVERSEN1);
case 11: return (&gotoblas_NEOVERSEV1);
case 12: return (&gotoblas_NEOVERSEN2);
case 13: return (&gotoblas_THUNDERX3T110);
case 14: return (&gotoblas_CORTEXA55);
case 15: return (&gotoblas_ARMV8SVE);
case 12: return (&gotoblas_NEOVERSEV2);
case 13: return (&gotoblas_NEOVERSEN2);
case 14: return (&gotoblas_THUNDERX3T110);
case 15: return (&gotoblas_CORTEXA55);
case 16: return (&gotoblas_ARMV8SVE);
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
@@ -247,6 +251,10 @@ static gotoblas_t *get_coretype(void) {
int implementer, variant, part, arch, revision, midr_el1;
char coremsg[128];
#if defined (OS_DARWIN)
return &gotoblas_NEOVERSEN1;
#endif
#if (!defined OS_LINUX && !defined OS_ANDROID)
return NULL;
#else
@@ -308,6 +316,13 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_NEOVERSEN1;
}else
return &gotoblas_NEOVERSEV1;
case 0xd4f:
if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
openblas_warning(FALLBACK_VERBOSE, NEOVERSEN1_FALLBACK);
return &gotoblas_NEOVERSEN1;
} else {
return &gotoblas_NEOVERSEV2;
}
#endif
case 0xd05: // Cortex A55
return &gotoblas_CORTEXA55;
@@ -352,6 +367,9 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_FALKOR;
}
break;
case 0x61: // Apple
return &gotoblas_NEOVERSEN1;
break;
default:
snprintf(coremsg, 128, "Unknown CPU model - implementer %x part %x\n",implementer,part);
openblas_warning(1, coremsg);

View File

@@ -43,6 +43,13 @@ char *gotoblas_corename(void) {
#define CPU_POWER9 9
#define CPU_POWER10 10
#ifndef POWER_9
#define POWER_9 0x20000 /* 9 class CPU */
#endif
#ifndef POWER_10
#define POWER_10 0x40000 /* 10 class CPU */
#endif
#ifdef _AIX
#include <sys/systemcfg.h>
@@ -62,7 +69,7 @@ static int cpuid(void)
else if (arch == POWER_9) return CPU_POWER9;
#endif
#ifdef POWER_10
else if (arch == POWER_10) return CPU_POWER10;
else if (arch >= POWER_10) return CPU_POWER10;
#endif
return CPU_UNKNOWN;
}
@@ -332,6 +339,9 @@ void gotoblas_dynamic_init(void) {
if (gotoblas && gotoblas -> init) {
strncpy(coren,gotoblas_corename(),20);
sprintf(coremsg, "Core: %s\n",coren);
if (getenv("GET_OPENBLAS_CORETYPE")) {
fprintf(stderr, "%s", coremsg);
}
openblas_warning(2, coremsg);
gotoblas -> init();
} else {

View File

@@ -3214,7 +3214,7 @@ void blas_shutdown(void){
#endif
memory[pos].lock = 0;
}
if (memory_overflowed)
if (memory_overflowed) {
for (pos = 0; pos < NEW_BUFFERS; pos ++){
newmemory[pos].addr = (void *)0;
newmemory[pos].used = 0;
@@ -3222,6 +3222,10 @@ void blas_shutdown(void){
newmemory[pos].pos = -1;
#endif
newmemory[pos].lock = 0;
}
free(newmemory);
newmemory = NULL;
memory_overflowed = 0;
}
UNLOCK_COMMAND(&alloc_lock);

View File

@@ -36,11 +36,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef SMP_SERVER
extern void openblas_set_num_threads(int num_threads) ;
extern int openblas_get_num_threads(void) ;
void openblas_set_num_threads_(int* num_threads){
openblas_set_num_threads(*num_threads);
}
int openblas_set_num_threads_local(int num_threads){
int ret = openblas_get_num_threads();
openblas_set_num_threads(num_threads);
blas_omp_threads_local=num_threads;
return ret;
}
#else
//Single thread
@@ -50,4 +59,8 @@ void openblas_set_num_threads(int num_threads) {
void openblas_set_num_threads_(int* num_threads){
}
int openblas_set_num_threads_local(int num_threads){
return 1;
}
#endif