diff --git a/Makefile.rule b/Makefile.rule index 2fc82619b..997b52d98 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -70,7 +70,7 @@ VERSION = 0.1 # time out to improve performance. This number should be from 4 to 30 # which corresponds to (1 << n) cycles. For example, if you set to 26, # thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz -# system). Also you can control this mumber by GOTO_THREAD_TIMEOUT +# system). Also you can control this mumber by THREAD_TIMEOUT # CCOMMON_OPT += -DTHREAD_TIMEOUT=26 # Using special device driver for mapping physically contigous memory diff --git a/README b/README index 1a10cb198..d1846399c 100644 --- a/README +++ b/README @@ -32,11 +32,19 @@ MIPS64: 4.Usages Link with libopenblas.a or -lopenblas for shared library. -Set the number of threads. for example, +4.1 Set the number of threads with environment variables. for example, export OPENBLAS_NUM_THREADS=4 + or +export GOTO_NUM_THREADS=4 or export OMP_NUM_THREADS=4 -OPENBLAS_NUM_THREAD is prior to OMP_NUM_THREADS. + +The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. + +4.2 Set the number of threads with calling functions. for example, +void goto_set_num_threads(int num_threads); +or +void openblas_set_num_threads(int num_threads); 5.Report Bugs Please add a issue in https://github.com/xianyi/OpenBLAS/issues diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 11f058e96..c0f77c4c9 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -525,7 +525,16 @@ int blas_thread_init(void){ if (thread_timeout < 4) thread_timeout = 4; if (thread_timeout > 30) thread_timeout = 30; thread_timeout = (1 << thread_timeout); - } + }else{ + p = getenv("GOTO_THREAD_TIMEOUT"); + if (p) { + thread_timeout = atoi(p); + if (thread_timeout < 4) thread_timeout = 4; + if (thread_timeout > 30) thread_timeout = 30; + thread_timeout = (1 << thread_timeout); + } + } + for(i = 0; i < blas_num_threads - 1; i++){ @@ -790,6 +799,11 @@ void goto_set_num_threads(int num_threads) { } +void openblas_set_num_threads(int num_threads) { + goto_set_num_threads(num_threads); + +} + /* Compatible function with pthread_create / join */ int gotoblas_pthread(int numthreads, void *function, void *args, int stride) { diff --git a/driver/others/init.c b/driver/others/init.c index 94f883728..452656c55 100644 --- a/driver/others/init.c +++ b/driver/others/init.c @@ -581,6 +581,7 @@ void gotoblas_affinity_init(void) { numprocs = 0; #else numprocs = readenv("OPENBLAS_NUM_THREADS"); + if (numprocs == 0) numprocs = readenv("GOTO_NUM_THREADS"); #endif if (numprocs == 0) numprocs = readenv("OMP_NUM_THREADS"); @@ -666,7 +667,7 @@ void gotoblas_affinity_init(void) { setup_mempolicy(); - if (readenv("OPENBLAS_MAIN_FREE")) { + if (readenv("OPENBLAS_MAIN_FREE") || readenv("GOTOBLAS_MAIN_FREE")) { sched_setaffinity(0, sizeof(cpu_orig_mask), &cpu_orig_mask[0]); } diff --git a/driver/others/memory.c b/driver/others/memory.c index fc5265715..fa41465f6 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -231,6 +231,13 @@ int blas_get_cpu_number(void){ p = getenv("OPENBLAS_NUM_THREADS"); if (p) blas_goto_num = atoi(p); if (blas_goto_num < 0) blas_goto_num = 0; + + if (blas_goto_num == 0) { + p = getenv("GOTO_NUM_THREADS"); + if (p) blas_goto_num = atoi(p); + if (blas_goto_num < 0) blas_goto_num = 0; + } + #endif blas_omp_num = 0;