diff --git a/Makefile.system b/Makefile.system index ec6339d62..6f3c0bc2b 100644 --- a/Makefile.system +++ b/Makefile.system @@ -305,6 +305,10 @@ ifdef SANITY_CHECK CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU) endif +ifdef MAX_STACK_ALLOC +CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC) +endif + # # Architecture dependent settings # diff --git a/interface/gemv.c b/interface/gemv.c index 2dd82dce5..155305be8 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -208,7 +208,18 @@ void CNAME(enum CBLAS_ORDER order, if (incx < 0) x -= (lenx - 1) * incx; if (incy < 0) y -= (leny - 1) * incy; +#ifdef MAX_STACK_ALLOC + int stack_alloc_size = m + n; + if(stack_alloc_size < 128) + //dgemv_n.S require a 128 bytes buffer + stack_alloc_size = 128; + if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) + stack_alloc_size = 0; + FLOAT stack_buffer[stack_alloc_size]; + buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); +#else buffer = (FLOAT *)blas_memory_alloc(1); +#endif #ifdef SMP @@ -237,7 +248,10 @@ void CNAME(enum CBLAS_ORDER order, } #endif - blas_memory_free(buffer); +#ifdef MAX_STACK_ALLOC + if(!stack_alloc_size) +#endif + blas_memory_free(buffer); FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); diff --git a/interface/ger.c b/interface/ger.c index 9857d2423..cac357786 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -171,7 +171,15 @@ void CNAME(enum CBLAS_ORDER order, if (incy < 0) y -= (n - 1) * incy; if (incx < 0) x -= (m - 1) * incx; +#ifdef MAX_STACK_ALLOC + int stack_alloc_size = m; + if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) + stack_alloc_size = 0; + FLOAT stack_buffer[stack_alloc_size]; + buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); +#else buffer = (FLOAT *)blas_memory_alloc(1); +#endif #ifdef SMPTEST nthreads = num_cpu_avail(2); @@ -190,7 +198,10 @@ void CNAME(enum CBLAS_ORDER order, } #endif - blas_memory_free(buffer); +#ifdef MAX_STACK_ALLOC + if(!stack_alloc_size) +#endif + blas_memory_free(buffer); FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); diff --git a/kernel/x86_64/sgemv_t_4.c b/kernel/x86_64/sgemv_t_4.c index cd13bb67d..61eb1ed84 100644 --- a/kernel/x86_64/sgemv_t_4.c +++ b/kernel/x86_64/sgemv_t_4.c @@ -302,7 +302,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO if ( n < 1 ) return(0); xbuffer = buffer; - ytemp = buffer + NBMAX; + ytemp = buffer + (m < NBMAX ? m : NBMAX); n0 = n / NBMAX; n1 = (n % NBMAX) >> 2 ;