From dc8e4e1959855ca24af7e2d675f2be33087ff96c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 4 Oct 2020 22:59:24 +0200 Subject: [PATCH 1/2] Reduce the BLAS3 heap allocation threshold to 32 and mark it as configurable --- Makefile.rule | 17 ++++++++++++++++- common.h | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Makefile.rule b/Makefile.rule index 4d6f2d313..635e02c02 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -279,7 +279,22 @@ COMMON_PROF = -pg # If you want to enable the experimental BFLOAT16 support # BUILD_HALF = 1 -# + + +# Set the thread number threshold beyond which the job array for the threaded level3 BLAS +# will be allocated on the heap rather than the stack. (This array alone requires +# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu +# counts, but obviously it is not the only item that ends up on the stack. +# The default value of 32 ensures that the overall requirement is compatible +# with the default 1MB stacksize imposed by having the Java VM loaded without use +# of its -Xss parameter. +# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible +# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java +# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code +# BLAS3_MEM_ALLOC_THRESHOLD = 160 + + + # the below is not yet configurable, use cmake if you need to build only select types BUILD_SINGLE = 1 BUILD_DOUBLE = 1 diff --git a/common.h b/common.h index ac12dd6d8..ab287262c 100644 --- a/common.h +++ b/common.h @@ -402,7 +402,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #endif #ifndef BLAS3_MEM_ALLOC_THRESHOLD -#define BLAS3_MEM_ALLOC_THRESHOLD 160 +#define BLAS3_MEM_ALLOC_THRESHOLD 32 #endif #ifdef QUAD_PRECISION From a5feea6611f49e875de83282c061843e18050af6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 4 Oct 2020 23:01:06 +0200 Subject: [PATCH 2/2] make BLAS3_MEM_ALLOC_THRESHOLD configurable on non-Windows --- cmake/system.cmake | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cmake/system.cmake b/cmake/system.cmake index 8908a1890..0734065df 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -323,7 +323,13 @@ else () set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_STACK_ALLOC=2048") endif () endif () - +if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") +if (DEFINED BLAS3_MEM_ALLOC_THRESHOLD) +if (NOT ${BLAS3_MEM_ALLOC_THRESHOLD} EQUAL 32) +set(CCOMMON_OPT "${CCOMMON_OPT} -DBLAS3_MEM_ALLOC_THRESHOLD=${BLAS3_MEM_ALLOC_THRESHOLD}") +endif() +endif() +endif() if (DEFINED LIBNAMESUFFIX) set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}") else ()