diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 21bc5f78e..c567ed688 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -49,8 +49,12 @@ int blas_server_avail = 0; +static void * blas_thread_buffer[MAX_CPU_NUMBER]; + void goto_set_num_threads(int num_threads) { + int i=0; + if (num_threads < 1) num_threads = blas_num_threads; if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; @@ -62,7 +66,19 @@ void goto_set_num_threads(int num_threads) { blas_cpu_number = num_threads; omp_set_num_threads(blas_cpu_number); - + + //adjust buffer for each thread + for(i=0; i sa; sb = queue -> sb; @@ -189,7 +222,14 @@ static void exec_threads(blas_queue_t *queue){ if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) { - buffer = blas_memory_alloc(2); + pos = omp_get_thread_num(); + buffer = blas_thread_buffer[pos]; + + //fallback + if(buffer==NULL) { + buffer = blas_memory_alloc(2); + release_flag=1; + } if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A); @@ -242,7 +282,7 @@ static void exec_threads(blas_queue_t *queue){ } - if (buffer != NULL) blas_memory_free(buffer); + if (release_flag) blas_memory_free(buffer); }