diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index cc1015b7e..1735ee931 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -74,6 +74,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #endif +#ifndef likely +#ifdef __GNUC__ +#define likely(x) __builtin_expect(!!(x), 1) +#else +#define likely(x) (x) +#endif +#endif +#ifndef unlikely +#ifdef __GNUC__ +#define unlikely(x) __builtin_expect(!!(x), 0) +#else +#define unlikely(x) (x) +#endif +#endif + #ifdef SMP_SERVER #undef MONITOR @@ -83,8 +98,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ATTRIBUTE_SIZE 128 -extern void openblas_warning(int verbose, const char * msg); - /* This is a thread server model implementation. The threads are */ /* spawned at first access to blas library, and still remains until */ /* destruction routine is called. The number of threads are */ @@ -586,6 +599,10 @@ static BLASULONG exec_queue_lock = 0; int exec_blas_async(BLASLONG pos, blas_queue_t *queue){ +#ifdef SMP_SERVER + // Handle lazy re-init of the thread-pool after a POSIX fork + if (unlikely(blas_server_avail == 0)) blas_thread_init(); +#endif BLASLONG i = 0; blas_queue_t *current = queue; #if defined(OS_LINUX) && !defined(NO_AFFINITY) && !defined(PARAMTEST) @@ -710,7 +727,11 @@ int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){ /* Execute Threads */ int exec_blas(BLASLONG num, blas_queue_t *queue){ - int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG); +#ifdef SMP_SERVER + // Handle lazy re-init of the thread-pool after a POSIX fork + if (unlikely(blas_server_avail == 0)) blas_thread_init(); +#endif + int (*routine)(blas_arg_t *, void *, void *, double *, double *, BLASLONG); #ifdef TIMING_DEBUG BLASULONG start, stop; @@ -923,17 +944,5 @@ int BLASFUNC(blas_thread_shutdown)(void){ return 0; } -/* -https://github.com/xianyi/OpenBLAS/issues/294 -Use pthread_atfork to close blas_thread_server before fork. -Then, re-init blas_thread_server after fork at child and parent. -*/ -void openblas_fork_handler() -{ - int err; - err = pthread_atfork (BLASFUNC(blas_thread_shutdown), blas_thread_init, blas_thread_init); - if(err != 0) - openblas_warning(0, "OpenBLAS cannot install fork handler. You may meet hang after fork.\n"); -} #endif diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index 090590e6a..0a484f3e4 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -315,9 +315,4 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ return 0; } -void openblas_fork_handler() -{ - -} - #endif diff --git a/driver/others/blas_server_win32.c b/driver/others/blas_server_win32.c index 68630a679..100ca34f7 100644 --- a/driver/others/blas_server_win32.c +++ b/driver/others/blas_server_win32.c @@ -498,8 +498,3 @@ void openblas_set_num_threads(int num) { goto_set_num_threads(num); } - -void openblas_fork_handler() -{ - -} diff --git a/driver/others/memory.c b/driver/others/memory.c index 4faf82f29..2b87ebe9c 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -143,6 +143,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. gotoblas_t *gotoblas = NULL; #endif +extern void openblas_warning(int verbose, const char * msg); + #ifndef SMP #define blas_cpu_number 1 @@ -253,6 +255,23 @@ int goto_get_num_procs (void) { return blas_cpu_number; } +void openblas_fork_handler() +{ + // This handler shuts down the OpenBLAS-managed PTHREAD pool when OpenBLAS is + // built with "make USE_OPENMP=0". + // Hanging can still happen when OpenBLAS is built against the libgomp + // implementation of OpenMP. The problem is tracked at: + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 + // In the mean time build with USE_OPENMP=0 or link against another + // implementation of OpenMP. +#ifndef OS_WINDOWS + int err; + err = pthread_atfork (BLASFUNC(blas_thread_shutdown), NULL, NULL); + if(err != 0) + openblas_warning(0, "OpenBLAS Warning ... cannot install fork handler. You may meet hang after fork.\n"); +#endif +} + int blas_get_cpu_number(void){ char *p; #if defined(OS_LINUX) || defined(OS_WINDOWS) || defined(OS_FREEBSD) || defined(OS_DARWIN) @@ -1268,6 +1287,9 @@ void CONSTRUCTOR gotoblas_init(void) { if (gotoblas_initialized) return; +#ifdef SMP + openblas_fork_handler(); +#endif #ifdef PROFILE moncontrol (0); @@ -1288,11 +1310,7 @@ void CONSTRUCTOR gotoblas_init(void) { #ifdef SMP if (blas_cpu_number == 0) blas_get_cpu_number(); #ifdef SMP_SERVER - if (blas_server_avail == 0) { - blas_thread_init(); - //deal with pthread and fork. - openblas_fork_handler(); - } + if (blas_server_avail == 0) blas_thread_init(); #endif #endif diff --git a/utest/Makefile b/utest/Makefile index 3d120f5b3..38ebb03df 100644 --- a/utest/Makefile +++ b/utest/Makefile @@ -11,7 +11,7 @@ CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a CFLAGS+=-I$(CUNIT_DIR)/include -OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o +OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o all : run_test diff --git a/utest/common_utest.h b/utest/common_utest.h index e57ae0556..f3841c58e 100644 --- a/utest/common_utest.h +++ b/utest/common_utest.h @@ -63,4 +63,6 @@ void test_dsdot_n_1(void); void test_samax(void); +void test_fork_safety(void); + #endif diff --git a/utest/main.c b/utest/main.c index ece94dd71..ca50e473e 100644 --- a/utest/main.c +++ b/utest/main.c @@ -60,6 +60,14 @@ CU_TestInfo test_level1[]={ {"Testing dsdot with n == 1",test_dsdot_n_1}, {"Testing samax", test_samax}, + +#if !defined(USE_OPENMP) && !defined(OS_WINDOWS) + // The GNU OpenMP implementation libgomp is not fork-safe (as of 4.8.2): + // http://gcc.gnu.org/bugzilla/show_bug.cgi?id=60035 + // Hence skip this test when OpenBLAS is built with OpenMP. + {"Testing fork safety", test_fork_safety}, +#endif + CU_TEST_INFO_NULL, }; diff --git a/utest/test_fork.c b/utest/test_fork.c new file mode 100644 index 000000000..1d8804ac6 --- /dev/null +++ b/utest/test_fork.c @@ -0,0 +1,123 @@ +/***************************************************************************** +Copyright (c) 2014, Lab of Parallel Software and Computational Science,ICSAS +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the ISCAS nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#ifndef OS_WINDOWS +#include "common_utest.h" +#include +#include + +void* xmalloc(size_t n) +{ + void* tmp; + tmp = malloc(n); + if (tmp == NULL) { + fprintf(stderr, "You are about to die\n"); + exit(1); + } else { + return tmp; + } +} + +void check_dgemm(double *a, double *b, double *result, double *expected, int n) +{ + int i; + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, + 1.0, a, n, b, n, 0.0, result, n); + for(i = 0; i < n * n; ++i) { + CU_ASSERT_DOUBLE_EQUAL(expected[i], result[i], CHECK_EPS); + } +} + +void test_fork_safety(void) +{ + int n = 1000; + int i; + + double *a, *b, *c, *d; + size_t n_bytes; + + pid_t fork_pid; + pid_t fork_pid_nested; + + n_bytes = sizeof(*a) * n * n; + + a = xmalloc(n_bytes); + b = xmalloc(n_bytes); + c = xmalloc(n_bytes); + d = xmalloc(n_bytes); + + // Put ones in a and b + for(i = 0; i < n * n; ++i) { + a[i] = 1; + b[i] = 1; + } + + // Compute a DGEMM product in the parent process prior to forking to + // ensure that the OpenBLAS thread pool is initialized. + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, n, n, n, + 1.0, a, n, b, n, 0.0, c, n); + + fork_pid = fork(); + if (fork_pid == -1) { + CU_FAIL("Failed to fork process."); + } else if (fork_pid == 0) { + // Compute a DGEMM product in the child process to check that the + // thread pool as been properly been reinitialized after the fork. + check_dgemm(a, b, d, c, n); + + // Nested fork to check that the pthread_atfork protection can work + // recursively + fork_pid_nested = fork(); + if (fork_pid_nested == -1) { + CU_FAIL("Failed to fork process."); + exit(1); + } else if (fork_pid_nested == 0) { + check_dgemm(a, b, d, c, n); + exit(0); + } else { + check_dgemm(a, b, d, c, n); + int child_status = 0; + pid_t wait_pid = wait(&child_status); + CU_ASSERT(wait_pid == fork_pid_nested); + CU_ASSERT(WEXITSTATUS (child_status) == 0); + exit(0); + } + } else { + check_dgemm(a, b, d, c, n); + // Wait for the child to finish and check the exit code. + int child_status = 0; + pid_t wait_pid = wait(&child_status); + CU_ASSERT(wait_pid == fork_pid); + CU_ASSERT(WEXITSTATUS (child_status) == 0); + } +} +#endif \ No newline at end of file