From dc8e4e1959855ca24af7e2d675f2be33087ff96c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 4 Oct 2020 22:59:24 +0200 Subject: [PATCH] Reduce the BLAS3 heap allocation threshold to 32 and mark it as configurable --- Makefile.rule | 17 ++++++++++++++++- common.h | 2 +- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Makefile.rule b/Makefile.rule index 4d6f2d313..635e02c02 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -279,7 +279,22 @@ COMMON_PROF = -pg # If you want to enable the experimental BFLOAT16 support # BUILD_HALF = 1 -# + + +# Set the thread number threshold beyond which the job array for the threaded level3 BLAS +# will be allocated on the heap rather than the stack. (This array alone requires +# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu +# counts, but obviously it is not the only item that ends up on the stack. +# The default value of 32 ensures that the overall requirement is compatible +# with the default 1MB stacksize imposed by having the Java VM loaded without use +# of its -Xss parameter. +# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible +# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java +# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code +# BLAS3_MEM_ALLOC_THRESHOLD = 160 + + + # the below is not yet configurable, use cmake if you need to build only select types BUILD_SINGLE = 1 BUILD_DOUBLE = 1 diff --git a/common.h b/common.h index ac12dd6d8..ab287262c 100644 --- a/common.h +++ b/common.h @@ -402,7 +402,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246 #endif #ifndef BLAS3_MEM_ALLOC_THRESHOLD -#define BLAS3_MEM_ALLOC_THRESHOLD 160 +#define BLAS3_MEM_ALLOC_THRESHOLD 32 #endif #ifdef QUAD_PRECISION