Fixed #14 the SEGFAULT bug on 64 cores. On SMP server, the number of CPUs or cores should be less than or equal to 64.
This commit is contained in:
parent
552f31dbbd
commit
989c6f8b06
|
@ -5,6 +5,12 @@ Version 0.1 alpha2(in development)
|
|||
common:
|
||||
* Fixed blasint undefined bug in <cblas.h> file. Other software
|
||||
could include this header successfully(Refs issue #13 on github)
|
||||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number
|
||||
of CPUs or cores should be less than or equal to 64.(Refs issue #14
|
||||
on github)
|
||||
* Support "void goto_set_num_threads(int num_threads)" and "void
|
||||
openblas_set_num_threads(int num_threads)" when USE_OPENMP=1
|
||||
|
||||
x86/x86_64:
|
||||
*
|
||||
MIPS64:
|
||||
|
|
12
README
12
README
|
@ -39,13 +39,17 @@ export GOTO_NUM_THREADS=4
|
|||
or
|
||||
export OMP_NUM_THREADS=4
|
||||
|
||||
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS.
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable.
|
||||
|
||||
4.2 Set the number of threads with calling functions. for example,
|
||||
void goto_set_num_threads(int num_threads);
|
||||
or
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
|
||||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too.
|
||||
|
||||
5.Report Bugs
|
||||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues
|
||||
|
||||
|
@ -56,4 +60,8 @@ Optimization on ICT Loongson 3A CPU
|
|||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas
|
||||
|
||||
8.ChangeLog
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version.
|
||||
|
||||
9.Known Issues
|
||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
|
|
|
@ -49,6 +49,26 @@
|
|||
|
||||
int blas_server_avail = 0;
|
||||
|
||||
void goto_set_num_threads(int num_threads) {
|
||||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
blas_num_threads = num_threads;
|
||||
}
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
omp_set_num_threads(blas_cpu_number);
|
||||
|
||||
}
|
||||
void openblas_set_num_threads(int num_threads) {
|
||||
|
||||
goto_set_num_threads(num_threads);
|
||||
}
|
||||
|
||||
int blas_thread_init(void){
|
||||
|
||||
blas_get_cpu_number();
|
||||
|
|
|
@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) {
|
|||
return count;
|
||||
}
|
||||
|
||||
/***
|
||||
Known issue: The number of CPUs/cores should less
|
||||
than sizeof(unsigned long). On 64 bits, the limit
|
||||
is 64. On 32 bits, it is 32.
|
||||
***/
|
||||
static inline unsigned long get_cpumap(int node) {
|
||||
|
||||
int infile;
|
||||
unsigned long affinity;
|
||||
char name[160];
|
||||
char cpumap[160];
|
||||
char *p, *dummy;
|
||||
|
||||
int i=0;
|
||||
|
||||
sprintf(name, CPUMAP_NAME, node);
|
||||
|
||||
infile = open(name, O_RDONLY);
|
||||
|
@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) {
|
|||
|
||||
if (infile != -1) {
|
||||
|
||||
read(infile, name, sizeof(name));
|
||||
|
||||
read(infile, cpumap, sizeof(cpumap));
|
||||
p = cpumap;
|
||||
while (*p != '\n' && i<160){
|
||||
if(*p != ',') {
|
||||
name[i++]=*p;
|
||||
}
|
||||
p++;
|
||||
}
|
||||
p = name;
|
||||
|
||||
while ((*p == '0') || (*p == ',')) p++;
|
||||
// while ((*p == '0') || (*p == ',')) p++;
|
||||
|
||||
affinity = strtol(p, &dummy, 16);
|
||||
affinity = strtoul(p, &dummy, 16);
|
||||
|
||||
close(infile);
|
||||
}
|
||||
|
@ -347,7 +360,13 @@ static void disable_hyperthread(void) {
|
|||
unsigned long share;
|
||||
int cpu;
|
||||
|
||||
common -> avail = (1UL << common -> num_procs) - 1;
|
||||
if(common->num_procs > 64){
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs);
|
||||
exit(1);
|
||||
}else if(common->num_procs == 64){
|
||||
common -> avail = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}else
|
||||
common -> avail = (1UL << common -> num_procs) - 1;
|
||||
|
||||
#ifdef DEBUG
|
||||
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail);
|
||||
|
@ -376,7 +395,13 @@ static void disable_affinity(void) {
|
|||
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]);
|
||||
#endif
|
||||
|
||||
lprocmask = (1UL << common -> final_num_procs) - 1;
|
||||
if(common->final_num_procs > 64){
|
||||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs);
|
||||
exit(1);
|
||||
}else if(common->final_num_procs == 64){
|
||||
lprocmask = 0xFFFFFFFFFFFFFFFFUL;
|
||||
}else
|
||||
lprocmask = (1UL << common -> final_num_procs) - 1;
|
||||
|
||||
#ifndef USE_OPENMP
|
||||
lprocmask &= *(unsigned long *)&cpu_orig_mask[0];
|
||||
|
|
Loading…
Reference in New Issue