diff --git a/common.h b/common.h index 30351a8ec..52f2402bb 100644 --- a/common.h +++ b/common.h @@ -727,6 +727,7 @@ typedef struct { #endif #ifndef ASSEMBLER +#include "common_stackalloc.h" #if 0 #include "symcopy.h" #endif diff --git a/common_stackalloc.h b/common_stackalloc.h new file mode 100644 index 000000000..1d6cf57e5 --- /dev/null +++ b/common_stackalloc.h @@ -0,0 +1,73 @@ +/******************************************************************************* +Copyright (c) 2016, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +#define STACK_ALLOC_PROTECT +#ifdef STACK_ALLOC_PROTECT +// Try to detect stack smashing +#include +#define STACK_ALLOC_PROTECT_SET volatile BLASLONG stack_check = 0x7ff8010203040506; +#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7ff8010203040506); +#else +#define STACK_ALLOC_PROTECT_SET +#define STACK_ALLOC_PROTECT_CHECK +#endif + +#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 + +/* + * Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC. + * Stack allocation is much faster than blas_memory_alloc or malloc, particularly + * when OpenBLAS is used from a multi-threaded application. + * SIZE must be carefully chosen to be: + * - as small as possible to maximize the number of stack allocation + * - large enough to support all architectures and kernel + * Chosing a too small SIZE will lead to a stack smashing. + */ +#define STACK_ALLOC(SIZE, TYPE, BUFFER) \ + /* make it volatile because some function (ex: dgemv_n.S) */ \ + /* do not restore all register */ \ + volatile int stack_alloc_size = SIZE; \ + if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \ + stack_alloc_size = 0; \ + STACK_ALLOC_PROTECT_SET \ + TYPE stack_buffer[stack_alloc_size]; \ + BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1); +#else + //Original OpenBLAS/GotoBLAS codes. + #define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1) +#endif + + +#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 +#define STACK_FREE(BUFFER) \ + STACK_ALLOC_PROTECT_CHECK \ + if(!stack_alloc_size) \ + blas_memory_free(BUFFER); +#else +#define STACK_FREE(BUFFER) blas_memory_free(BUFFER) +#endif + diff --git a/interface/gemv.c b/interface/gemv.c index 206390ed9..8e0bdcca3 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -37,7 +37,6 @@ /*********************************************************************/ #include -#include #include "common.h" #include "l1param.h" #ifdef FUNCTION_PROFILE @@ -80,6 +79,7 @@ void NAME(char *TRANS, blasint *M, blasint *N, FLOAT alpha = *ALPHA; FLOAT beta = *BETA; FLOAT *buffer; + int buffer_size; #ifdef SMP int nthreads; int nthreads_max; @@ -135,7 +135,7 @@ void CNAME(enum CBLAS_ORDER order, FLOAT *buffer; blasint lenx, leny; - int trans; + int trans, buffer_size; blasint info, t; #ifdef SMP int nthreads; @@ -216,33 +216,13 @@ void CNAME(enum CBLAS_ORDER order, if (incx < 0) x -= (lenx - 1) * incx; if (incy < 0) y -= (leny - 1) * incy; -#ifdef MAX_STACK_ALLOC - // make it volatile because some gemv implementation (ex: dgemv_n.S) - // do not restore all register - volatile int stack_alloc_size = 0; - //for gemv_n and gemv_t, try to allocate on stack - stack_alloc_size = m + n; -#ifdef ALIGNED_ACCESS - stack_alloc_size += 3; -#endif -// if(stack_alloc_size < 128) - //dgemv_n.S require a 128 bytes buffer -// increasing instead of capping 128 -// ABI STACK for windows 288 bytes - stack_alloc_size += 288 / sizeof(FLOAT) ; - - if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) - stack_alloc_size = 0; - -// stack overflow check - volatile double stack_check = 3.14159265358979323846; - FLOAT stack_buffer[stack_alloc_size]; - buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); - // printf("stack_alloc_size=%d\n", stack_alloc_size); -#else - //Original OpenBLAS/GotoBLAS codes. - buffer = (FLOAT *)blas_memory_alloc(1); + buffer_size = m + n + 128 / sizeof(FLOAT); +#ifdef WINDOWS_ABI + buffer_size += 160 / sizeof(FLOAT) ; #endif + // for alignment + buffer_size = (buffer_size + 3) & ~3; + STACK_ALLOC(buffer_size, FLOAT, buffer); #ifdef SMP @@ -271,17 +251,7 @@ void CNAME(enum CBLAS_ORDER order, } #endif -#ifdef MAX_STACK_ALLOC - // stack overflow check - assert(stack_check==3.14159265358979323846); - - if(!stack_alloc_size){ - blas_memory_free(buffer); - } -#else - blas_memory_free(buffer); -#endif - + STACK_FREE(buffer); FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); IDEBUG_END; diff --git a/interface/ger.c b/interface/ger.c index 9dd2dc58b..2d19e3bc0 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -171,15 +171,7 @@ void CNAME(enum CBLAS_ORDER order, if (incy < 0) y -= (n - 1) * incy; if (incx < 0) x -= (m - 1) * incx; -#ifdef MAX_STACK_ALLOC - volatile int stack_alloc_size = m; - if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT)) - stack_alloc_size = 0; - FLOAT stack_buffer[stack_alloc_size]; - buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1); -#else - buffer = (FLOAT *)blas_memory_alloc(1); -#endif + STACK_ALLOC(m, FLOAT, buffer); #ifdef SMPTEST nthreads = num_cpu_avail(2); @@ -198,11 +190,7 @@ void CNAME(enum CBLAS_ORDER order, } #endif -#ifdef MAX_STACK_ALLOC - if(!stack_alloc_size) -#endif - blas_memory_free(buffer); - + STACK_FREE(buffer); FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n); IDEBUG_END;