From d2cb610272137536416df2e44f1bc8175ddd4eaf Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 May 2019 23:18:43 +0200 Subject: [PATCH 1/5] Add option USE_LOCKING for single-threaded build with locking support for calling from concurrent threads --- Makefile.rule | 10 ++++++++-- Makefile.system | 12 ++++++++++++ common.h | 4 ++-- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/Makefile.rule b/Makefile.rule index 17815096e..faf8c8013 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -56,7 +56,13 @@ VERSION = 0.3.7.dev # specify it. # For force setting for single threaded, specify USE_THREAD = 0 # For force setting for multi threaded, specify USE_THREAD = 1 -# USE_THREAD = 0 +USE_THREAD = 0 + +# If you want to build a single-threaded OpenBLAS, but expect to call this +# from several concurrent threads in some other program, comment this in for +# thread safety. (This is done automatically for USE_THREAD=1 , and should not +# be necessary when USE_OPENMP=1) +# USE_LOCKING = 1 # If you're going to use this library with OpenMP, please comment it in. # This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8. @@ -220,7 +226,7 @@ NO_AFFINITY = 1 COMMON_PROF = -pg # Build Debug version -# DEBUG = 1 +DEBUG = 1 # Set maximum stack allocation. # The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV diff --git a/Makefile.system b/Makefile.system index a95d6190f..29aef7e27 100644 --- a/Makefile.system +++ b/Makefile.system @@ -237,6 +237,10 @@ SMP = 1 endif endif +ifeq ($(SMP), 1) +USE_LOCKING = +endif + ifndef NEED_PIC NEED_PIC = 1 endif @@ -388,6 +392,12 @@ ifneq ($(MAX_STACK_ALLOC), 0) CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC) endif +ifdef USE_LOCKING +ifneq ($(USE_LOCKING), 0) +CCOMMON_OPT += -DUSE_LOCKING +endif +endif + # # Architecture dependent settings # @@ -744,6 +754,8 @@ CCOMMON_OPT += -DF_INTERFACE_GFORT FCOMMON_OPT += -Wall # make single-threaded LAPACK calls thread-safe #1847 FCOMMON_OPT += -frecursive +# work around ABI changes in gfortran 9 that break calls from C code +FCOMMON_OPT += -fno-optimize-sibling-calls #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc ifneq ($(NO_LAPACK), 1) EXTRALIB += -lgfortran diff --git a/common.h b/common.h index 0ac74bb20..a9fe8d911 100644 --- a/common.h +++ b/common.h @@ -131,7 +131,7 @@ extern "C" { #include #include #include -#ifdef SMP +#if defined(SMP) || defined(USE_LOCKING) #include #endif #endif @@ -200,7 +200,7 @@ extern "C" { #error "You can't specify both LOCK operation!" #endif -#ifdef SMP +#if defined(SMP) || defined(USE_LOCKING) #define USE_PTHREAD_LOCK #undef USE_PTHREAD_SPINLOCK #endif From 1e52572be38541cc11ac39cef6cded8a640bb65b Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 May 2019 23:19:30 +0200 Subject: [PATCH 2/5] Add option USE_LOCKING for single-threaded build with locking support --- cmake/system.cmake | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index d0f560872..adedd32cc 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -136,6 +136,10 @@ endif () if (USE_THREAD) message(STATUS "Multi-threading enabled with ${NUM_THREADS} threads.") +else() + if (${USE_LOCKING}) + set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_LOCKING") + endif () endif () include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") From 86dda5c2fa9e298deacdd17211e2c4e58f2688ea Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 May 2019 23:21:20 +0200 Subject: [PATCH 3/5] Add option USE_LOCKING for SMP-like locking in USE_THREAD=0 builds --- driver/others/memory.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 02352b3ae..adb1ec86c 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -2062,13 +2062,13 @@ static void *alloc_mmap(void *address){ } if (map_address != (void *)-1) { -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif } @@ -2214,13 +2214,13 @@ static void *alloc_mmap(void *address){ #endif if (map_address != (void *)-1) { -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #endif release_info[release_pos].address = map_address; release_info[release_pos].func = alloc_mmap_free; release_pos ++; -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif } @@ -2701,7 +2701,7 @@ void *blas_memory_alloc(int procpos){ position = 0; -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #endif do { @@ -2718,7 +2718,7 @@ void *blas_memory_alloc(int procpos){ position ++; } while (position < NUM_BUFFERS); -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif goto error; @@ -2730,7 +2730,7 @@ void *blas_memory_alloc(int procpos){ #endif memory[position].used = 1; -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #else blas_unlock(&memory[position].lock); @@ -2779,11 +2779,11 @@ void *blas_memory_alloc(int procpos){ } while ((BLASLONG)map_address == -1); -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #endif memory[position].addr = map_address; -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif @@ -2839,7 +2839,7 @@ void blas_memory_free(void *free_area){ #endif position = 0; -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) LOCK_COMMAND(&alloc_lock); #endif while ((position < NUM_BUFFERS) && (memory[position].addr != free_area)) @@ -2855,7 +2855,7 @@ void blas_memory_free(void *free_area){ WMB; memory[position].used = 0; -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif @@ -2872,7 +2872,7 @@ void blas_memory_free(void *free_area){ for (position = 0; position < NUM_BUFFERS; position++) printf("%4ld %p : %d\n", position, memory[position].addr, memory[position].used); #endif -#if defined(SMP) && !defined(USE_OPENMP) +#if (defined(SMP) || defined(USE_LOCKING)) && !defined(USE_OPENMP) UNLOCK_COMMAND(&alloc_lock); #endif return; @@ -2924,7 +2924,7 @@ void blas_shutdown(void){ #if defined(OS_LINUX) && !defined(NO_WARMUP) -#ifdef SMP +#if defined(SMP) || defined(USE_LOCKING) #if defined(USE_PTHREAD_LOCK) static pthread_mutex_t init_lock = PTHREAD_MUTEX_INITIALIZER; #elif defined(USE_PTHREAD_SPINLOCK) @@ -2949,7 +2949,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, if (hot_alloc != 2) { #endif -#ifdef SMP +#if defined(SMP) || defined(USE_LOCKING) LOCK_COMMAND(&init_lock); #endif @@ -2959,7 +2959,7 @@ static void _touch_memory(blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, size -= PAGESIZE; } -#ifdef SMP +#if defined(SMP) || defined(USE_LOCKING) UNLOCK_COMMAND(&init_lock); #endif From 5ecffc28f2c32a23222ab633c904c9886923ecf1 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 May 2019 23:36:17 +0200 Subject: [PATCH 4/5] Add option USE_LOCKING but keep default settings intact --- Makefile.rule | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.rule b/Makefile.rule index faf8c8013..255d1da46 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -56,7 +56,7 @@ VERSION = 0.3.7.dev # specify it. # For force setting for single threaded, specify USE_THREAD = 0 # For force setting for multi threaded, specify USE_THREAD = 1 -USE_THREAD = 0 +# USE_THREAD = 0 # If you want to build a single-threaded OpenBLAS, but expect to call this # from several concurrent threads in some other program, comment this in for @@ -226,7 +226,7 @@ NO_AFFINITY = 1 COMMON_PROF = -pg # Build Debug version -DEBUG = 1 +# DEBUG = 1 # Set maximum stack allocation. # The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV From f66c11fc22fa01eb8e120d4274d262b3795e4281 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 15 May 2019 23:38:12 +0200 Subject: [PATCH 5/5] Remove unrelated change --- Makefile.system | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile.system b/Makefile.system index 29aef7e27..f574edf88 100644 --- a/Makefile.system +++ b/Makefile.system @@ -754,8 +754,6 @@ CCOMMON_OPT += -DF_INTERFACE_GFORT FCOMMON_OPT += -Wall # make single-threaded LAPACK calls thread-safe #1847 FCOMMON_OPT += -frecursive -# work around ABI changes in gfortran 9 that break calls from C code -FCOMMON_OPT += -fno-optimize-sibling-calls #Don't include -lgfortran, when NO_LAPACK=1 or lsbcc ifneq ($(NO_LAPACK), 1) EXTRALIB += -lgfortran