Merge pull request #4503 from shivammonaka/OpenMP-Locks

OpenMP locks instead of busy-waiting with NUM_PARALLEL
This commit is contained in:
Martin Kroeker 2024-03-14 20:56:04 +01:00 committed by GitHub
commit 66bde6243e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 62 additions and 20 deletions

View File

@ -545,13 +545,31 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
*range_n, IFLOAT *sa, IFLOAT *sb,
BLASLONG nthreads_m, BLASLONG nthreads_n) {
#ifndef USE_OPENMP
#ifndef OS_WINDOWS
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
#ifdef USE_OPENMP
static omp_lock_t level3_lock, critical_section_lock;
static volatile BLASLONG init_lock = 0, omp_lock_initialized = 0,
parallel_section_left = MAX_PARALLEL_NUMBER;
// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
while(omp_lock_initialized == 0)
{
blas_lock(&init_lock);
{
if(omp_lock_initialized == 0)
{
omp_init_lock(&level3_lock);
omp_init_lock(&critical_section_lock);
omp_lock_initialized = 1;
WMB;
}
blas_unlock(&init_lock);
}
}
#elif defined(OS_WINDOWS)
CRITICAL_SECTION level3_lock;
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
CRITICAL_SECTION level3_lock;
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
#endif
blas_arg_t newarg;
@ -599,12 +617,28 @@ InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
#endif
#ifndef USE_OPENMP
#ifndef OS_WINDOWS
pthread_mutex_lock(&level3_lock);
#ifdef USE_OPENMP
omp_set_lock(&level3_lock);
omp_set_lock(&critical_section_lock);
parallel_section_left--;
/*
How OpenMP locks works with NUM_PARALLEL
1) parallel_section_left = Number of available concurrent executions of OpenBLAS - Number of currently executing OpenBLAS executions
2) level3_lock is acting like a master lock or barrier which stops OpenBLAS calls when all the parallel_section are currently busy executing other OpenBLAS calls
3) critical_section_lock is used for updating variables shared between threads executing OpenBLAS calls concurrently and for unlocking of master lock whenever required
4) Unlock master lock only when we have not already exhausted all the parallel_sections and allow another thread with a OpenBLAS call to enter
*/
if(parallel_section_left != 0)
omp_unset_lock(&level3_lock);
omp_unset_lock(&critical_section_lock);
#elif defined(OS_WINDOWS)
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
pthread_mutex_lock(&level3_lock);
#endif
#ifdef USE_ALLOC_HEAP
@ -732,12 +766,24 @@ EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
free(job);
#endif
#ifndef USE_OPENMP
#ifndef OS_WINDOWS
pthread_mutex_unlock(&level3_lock);
#else
#ifdef USE_OPENMP
omp_set_lock(&critical_section_lock);
parallel_section_left++;
/*
Unlock master lock only when all the parallel_sections are already exhausted and one of the thread has completed its OpenBLAS call
otherwise just increment the parallel_section_left
The master lock is only locked when we have exhausted all the parallel_sections, So only unlock it then and otherwise just increment the count
*/
if(parallel_section_left == 1)
omp_unset_lock(&level3_lock);
omp_unset_lock(&critical_section_lock);
#elif defined(OS_WINDOWS)
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
#endif
#else
pthread_mutex_unlock(&level3_lock);
#endif
return 0;

View File

@ -407,7 +407,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
}
#endif
while(true) {
for(i=0; i < MAX_PARALLEL_NUMBER; i++) {
#ifdef HAVE_C11
_Bool inuse = false;
@ -420,9 +419,6 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
break;
}
}
if(i != MAX_PARALLEL_NUMBER)
break;
}
if (openblas_omp_adaptive_env() != 0) {
#pragma omp parallel for num_threads(num) schedule(OMP_SCHED)