In OpenMP threading, preallocate the thread buffer instead of allocating the buffer every time. This patch improved the performance slightly.

This commit is contained in:
Zhang Xianyi 2013-03-01 14:36:47 +08:00
parent 3cc6ae793e
commit d744c9590a
1 changed files with 45 additions and 5 deletions

View File

@ -49,8 +49,12 @@
int blas_server_avail = 0;
static void * blas_thread_buffer[MAX_CPU_NUMBER];
void goto_set_num_threads(int num_threads) {
int i=0;
if (num_threads < 1) num_threads = blas_num_threads;
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
@ -63,6 +67,18 @@ void goto_set_num_threads(int num_threads) {
omp_set_num_threads(blas_cpu_number);
//adjust buffer for each thread
for(i=0; i<blas_cpu_number; i++){
if(blas_thread_buffer[i]==NULL){
blas_thread_buffer[i]=blas_memory_alloc(2);
}
}
for(; i<MAX_CPU_NUMBER; i++){
if(blas_thread_buffer[i]!=NULL){
blas_memory_free(blas_thread_buffer[i]);
blas_thread_buffer[i]=NULL;
}
}
#if defined(ARCH_MIPS64)
//set parameters for different number of threads.
blas_set_parameter();
@ -76,17 +92,33 @@ void openblas_set_num_threads(int num_threads) {
int blas_thread_init(void){
int i=0;
blas_get_cpu_number();
blas_server_avail = 1;
for(i=0; i<blas_num_threads; i++){
blas_thread_buffer[i]=blas_memory_alloc(2);
}
for(; i<MAX_CPU_NUMBER; i++){
blas_thread_buffer[i]=NULL;
}
return 0;
}
int BLASFUNC(blas_thread_shutdown)(void){
int i=0;
blas_server_avail = 0;
for(i=0; i<MAX_CPU_NUMBER; i++){
if(blas_thread_buffer[i]!=NULL){
blas_memory_free(blas_thread_buffer[i]);
blas_thread_buffer[i]=NULL;
}
}
return 0;
}
@ -177,6 +209,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
static void exec_threads(blas_queue_t *queue){
void *buffer, *sa, *sb;
int pos=0, release_flag=0;
buffer = NULL;
sa = queue -> sa;
@ -189,7 +222,14 @@ static void exec_threads(blas_queue_t *queue){
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) {
pos = omp_get_thread_num();
buffer = blas_thread_buffer[pos];
//fallback
if(buffer==NULL) {
buffer = blas_memory_alloc(2);
release_flag=1;
}
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
@ -242,7 +282,7 @@ static void exec_threads(blas_queue_t *queue){
}
if (buffer != NULL) blas_memory_free(buffer);
if (release_flag) blas_memory_free(buffer);
}