From 32d2ca3035d76e18b2bc64c7bfbe3fad2dba234b Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 11 Jul 2013 03:20:02 +0800 Subject: [PATCH] Refs #214, #221, #246. Fixed the getrf overflow bug on Windows. I used a smaller threshold since the stack size is 1MB on windows. --- common.h | 17 +++++++++++++++++ driver/level3/level3_gemm3m_thread.c | 2 +- driver/level3/level3_syrk_threaded.c | 2 +- driver/level3/level3_thread.c | 2 +- lapack/getrf/getrf_parallel.c | 2 +- lapack/potrf/potrf_parallel.c | 2 +- 6 files changed, 22 insertions(+), 5 deletions(-) diff --git a/common.h b/common.h index d46a5230a..fa4c1d745 100644 --- a/common.h +++ b/common.h @@ -314,6 +314,23 @@ typedef int blasint; #define YIELDING sched_yield() #endif +/*** +To alloc job_t on heap or statck. +please https://github.com/xianyi/OpenBLAS/issues/246 +***/ +#if defined(OS_WINDOWS) +#define GETRF_MEM_ALLOC_THRESHOLD 32 +#define BLAS3_MEM_ALLOC_THRESHOLD 32 +#endif + +#ifndef GETRF_MEM_ALLOC_THRESHOLD +#define GETRF_MEM_ALLOC_THRESHOLD 80 +#endif + +#ifndef BLAS3_MEM_ALLOC_THRESHOLD +#define BLAS3_MEM_ALLOC_THRESHOLD 160 +#endif + #ifdef QUAD_PRECISION #include "common_quad.h" #endif diff --git a/driver/level3/level3_gemm3m_thread.c b/driver/level3/level3_gemm3m_thread.c index 036eebb04..bcb0f9dd9 100644 --- a/driver/level3/level3_gemm3m_thread.c +++ b/driver/level3/level3_gemm3m_thread.c @@ -50,7 +50,7 @@ //The array of job_t may overflow the stack. //Instead, use malloc to alloc job_t. -#if MAX_CPU_NUMBER > 210 +#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD #define USE_ALLOC_HEAP #endif diff --git a/driver/level3/level3_syrk_threaded.c b/driver/level3/level3_syrk_threaded.c index 989d156e4..4a3f7a89f 100644 --- a/driver/level3/level3_syrk_threaded.c +++ b/driver/level3/level3_syrk_threaded.c @@ -50,7 +50,7 @@ //The array of job_t may overflow the stack. //Instead, use malloc to alloc job_t. -#if MAX_CPU_NUMBER > 210 +#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD #define USE_ALLOC_HEAP #endif diff --git a/driver/level3/level3_thread.c b/driver/level3/level3_thread.c index 364b72e2d..3242790fa 100644 --- a/driver/level3/level3_thread.c +++ b/driver/level3/level3_thread.c @@ -50,7 +50,7 @@ //The array of job_t may overflow the stack. //Instead, use malloc to alloc job_t. -#if MAX_CPU_NUMBER > 210 +#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD #define USE_ALLOC_HEAP #endif diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index e9ce038b5..21ea9d5f5 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -45,7 +45,7 @@ double sqrt(double); //In this case, the recursive getrf_parallel may overflow the stack. //Instead, use malloc to alloc job_t. -#if MAX_CPU_NUMBER > 90 +#if MAX_CPU_NUMBER > GETRF_MEM_ALLOC_THRESHOLD #define USE_ALLOC_HEAP #endif diff --git a/lapack/potrf/potrf_parallel.c b/lapack/potrf/potrf_parallel.c index 4156dc04c..eec9b6e05 100644 --- a/lapack/potrf/potrf_parallel.c +++ b/lapack/potrf/potrf_parallel.c @@ -43,7 +43,7 @@ //The array of job_t may overflow the stack. //Instead, use malloc to alloc job_t. -#if MAX_CPU_NUMBER > 210 +#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD #define USE_ALLOC_HEAP #endif