Reduce the BLAS3 heap allocation threshold to 32 and mark it as configurable

This commit is contained in:
Martin Kroeker 2020-10-04 22:59:24 +02:00 committed by GitHub
parent cccd1438da
commit dc8e4e1959
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 2 deletions

View File

@ -279,7 +279,22 @@ COMMON_PROF = -pg
# If you want to enable the experimental BFLOAT16 support
# BUILD_HALF = 1
#
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
# will be allocated on the heap rather than the stack. (This array alone requires
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
# counts, but obviously it is not the only item that ends up on the stack.
# The default value of 32 ensures that the overall requirement is compatible
# with the default 1MB stacksize imposed by having the Java VM loaded without use
# of its -Xss parameter.
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
# BLAS3_MEM_ALLOC_THRESHOLD = 160
# the below is not yet configurable, use cmake if you need to build only select types
BUILD_SINGLE = 1
BUILD_DOUBLE = 1

View File

@ -402,7 +402,7 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#endif
#ifndef BLAS3_MEM_ALLOC_THRESHOLD
#define BLAS3_MEM_ALLOC_THRESHOLD 160
#define BLAS3_MEM_ALLOC_THRESHOLD 32
#endif
#ifdef QUAD_PRECISION