diff --git a/Changelog.txt b/Changelog.txt index f4f9c6e35..7df8c0c19 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -5,6 +5,12 @@ Version 0.1 alpha2(in development) common: * Fixed blasint undefined bug in file. Other software could include this header successfully(Refs issue #13 on github) + * Fixed the SEGFAULT bug on 64 cores. On SMP server, the number + of CPUs or cores should be less than or equal to 64.(Refs issue #14 + on github) + * Support "void goto_set_num_threads(int num_threads)" and "void + openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 + x86/x86_64: * MIPS64: diff --git a/README b/README index 9b04f6f99..46792c091 100644 --- a/README +++ b/README @@ -39,13 +39,17 @@ export GOTO_NUM_THREADS=4 or export OMP_NUM_THREADS=4 -The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. +The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. + +If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. 4.2 Set the number of threads with calling functions. for example, void goto_set_num_threads(int num_threads); or void openblas_set_num_threads(int num_threads); +If you compile this lib with USE_OPENMP=1, you should use the above functions, too. + 5.Report Bugs Please add a issue in https://github.com/xianyi/OpenBLAS/issues @@ -56,4 +60,8 @@ Optimization on ICT Loongson 3A CPU OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas 8.ChangeLog -Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. \ No newline at end of file +Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. + +9.Known Issues +* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 3e70d8549..17d886e52 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -49,6 +49,26 @@ int blas_server_avail = 0; +void goto_set_num_threads(int num_threads) { + + if (num_threads < 1) num_threads = blas_num_threads; + + if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; + + if (num_threads > blas_num_threads) { + blas_num_threads = num_threads; + } + + blas_cpu_number = num_threads; + + omp_set_num_threads(blas_cpu_number); + +} +void openblas_set_num_threads(int num_threads) { + + goto_set_num_threads(num_threads); +} + int blas_thread_init(void){ blas_get_cpu_number(); diff --git a/driver/others/init.c b/driver/others/init.c index 7ee7dc45d..4adba661f 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) { return count; } +/*** + Known issue: The number of CPUs/cores should less + than sizeof(unsigned long). On 64 bits, the limit + is 64. On 32 bits, it is 32. +***/ static inline unsigned long get_cpumap(int node) { int infile; unsigned long affinity; char name[160]; + char cpumap[160]; char *p, *dummy; - + int i=0; + sprintf(name, CPUMAP_NAME, node); infile = open(name, O_RDONLY); @@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) { if (infile != -1) { - read(infile, name, sizeof(name)); - + read(infile, cpumap, sizeof(cpumap)); + p = cpumap; + while (*p != '\n' && i<160){ + if(*p != ',') { + name[i++]=*p; + } + p++; + } p = name; - while ((*p == '0') || (*p == ',')) p++; + // while ((*p == '0') || (*p == ',')) p++; - affinity = strtol(p, &dummy, 16); + affinity = strtoul(p, &dummy, 16); close(infile); } @@ -347,7 +360,13 @@ static void disable_hyperthread(void) { unsigned long share; int cpu; - common -> avail = (1UL << common -> num_procs) - 1; + if(common->num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); + exit(1); + }else if(common->num_procs == 64){ + common -> avail = 0xFFFFFFFFFFFFFFFFUL; + }else + common -> avail = (1UL << common -> num_procs) - 1; #ifdef DEBUG fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail); @@ -376,7 +395,13 @@ static void disable_affinity(void) { fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]); #endif - lprocmask = (1UL << common -> final_num_procs) - 1; + if(common->final_num_procs > 64){ + fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); + exit(1); + }else if(common->final_num_procs == 64){ + lprocmask = 0xFFFFFFFFFFFFFFFFUL; + }else + lprocmask = (1UL << common -> final_num_procs) - 1; #ifndef USE_OPENMP lprocmask &= *(unsigned long *)&cpu_orig_mask[0];