Merge pull request #4503 from shivammonaka/OpenMP-Locks
OpenMP locks instead of busy-waiting with NUM_PARALLEL
This commit is contained in:
commit
66bde6243e
|
@ -545,13 +545,31 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
|
||||||
*range_n, IFLOAT *sa, IFLOAT *sb,
|
*range_n, IFLOAT *sa, IFLOAT *sb,
|
||||||
BLASLONG nthreads_m, BLASLONG nthreads_n) {
|
BLASLONG nthreads_m, BLASLONG nthreads_n) {
|
||||||
|
|
||||||
#ifndef USE_OPENMP
|
#ifdef USE_OPENMP
|
||||||
#ifndef OS_WINDOWS
|
static omp_lock_t level3_lock, critical_section_lock;
|
||||||
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
|
static volatile BLASLONG init_lock = 0, omp_lock_initialized = 0,
|
||||||
|
parallel_section_left = MAX_PARALLEL_NUMBER;
|
||||||
|
|
||||||
|
// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
|
||||||
|
while(omp_lock_initialized == 0)
|
||||||
|
{
|
||||||
|
blas_lock(&init_lock);
|
||||||
|
{
|
||||||
|
if(omp_lock_initialized == 0)
|
||||||
|
{
|
||||||
|
omp_init_lock(&level3_lock);
|
||||||
|
omp_init_lock(&critical_section_lock);
|
||||||
|
omp_lock_initialized = 1;
|
||||||
|
WMB;
|
||||||
|
}
|
||||||
|
blas_unlock(&init_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif defined(OS_WINDOWS)
|
||||||
|
CRITICAL_SECTION level3_lock;
|
||||||
|
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
||||||
#else
|
#else
|
||||||
CRITICAL_SECTION level3_lock;
|
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||||
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
blas_arg_t newarg;
|
blas_arg_t newarg;
|
||||||
|
@ -599,12 +617,28 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef USE_OPENMP
|
#ifdef USE_OPENMP
|
||||||
#ifndef OS_WINDOWS
|
omp_set_lock(&level3_lock);
|
||||||
pthread_mutex_lock(&level3_lock);
|
omp_set_lock(&critical_section_lock);
|
||||||
|
|
||||||
|
parallel_section_left--;
|
||||||
|
|
||||||
|
/*
|
||||||
|
How OpenMP locks works with NUM_PARALLEL
|
||||||
|
1) parallel_section_left = Number of available concurrent executions of OpenBLAS - Number of currently executing OpenBLAS executions
|
||||||
|
2) level3_lock is acting like a master lock or barrier which stops OpenBLAS calls when all the parallel_section are currently busy executing other OpenBLAS calls
|
||||||
|
3) critical_section_lock is used for updating variables shared between threads executing OpenBLAS calls concurrently and for unlocking of master lock whenever required
|
||||||
|
4) Unlock master lock only when we have not already exhausted all the parallel_sections and allow another thread with a OpenBLAS call to enter
|
||||||
|
*/
|
||||||
|
if(parallel_section_left != 0)
|
||||||
|
omp_unset_lock(&level3_lock);
|
||||||
|
|
||||||
|
omp_unset_lock(&critical_section_lock);
|
||||||
|
|
||||||
|
#elif defined(OS_WINDOWS)
|
||||||
|
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
||||||
#else
|
#else
|
||||||
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
pthread_mutex_lock(&level3_lock);
|
||||||
#endif
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef USE_ALLOC_HEAP
|
#ifdef USE_ALLOC_HEAP
|
||||||
|
@ -732,12 +766,24 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
||||||
free(job);
|
free(job);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef USE_OPENMP
|
#ifdef USE_OPENMP
|
||||||
#ifndef OS_WINDOWS
|
omp_set_lock(&critical_section_lock);
|
||||||
pthread_mutex_unlock(&level3_lock);
|
parallel_section_left++;
|
||||||
#else
|
|
||||||
|
/*
|
||||||
|
Unlock master lock only when all the parallel_sections are already exhausted and one of the thread has completed its OpenBLAS call
|
||||||
|
otherwise just increment the parallel_section_left
|
||||||
|
The master lock is only locked when we have exhausted all the parallel_sections, So only unlock it then and otherwise just increment the count
|
||||||
|
*/
|
||||||
|
if(parallel_section_left == 1)
|
||||||
|
omp_unset_lock(&level3_lock);
|
||||||
|
|
||||||
|
omp_unset_lock(&critical_section_lock);
|
||||||
|
|
||||||
|
#elif defined(OS_WINDOWS)
|
||||||
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
|
||||||
#endif
|
#else
|
||||||
|
pthread_mutex_unlock(&level3_lock);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -407,7 +407,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
while(true) {
|
|
||||||
for(i=0; i < MAX_PARALLEL_NUMBER; i++) {
|
for(i=0; i < MAX_PARALLEL_NUMBER; i++) {
|
||||||
#ifdef HAVE_C11
|
#ifdef HAVE_C11
|
||||||
_Bool inuse = false;
|
_Bool inuse = false;
|
||||||
|
@ -420,9 +419,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(i != MAX_PARALLEL_NUMBER)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (openblas_omp_adaptive_env() != 0) {
|
if (openblas_omp_adaptive_env() != 0) {
|
||||||
#pragma omp parallel for num_threads(num) schedule(OMP_SCHED)
|
#pragma omp parallel for num_threads(num) schedule(OMP_SCHED)
|
||||||
|
|
Loading…
Reference in New Issue