Factorize MAX_STACK_ALLOC code to common_stackalloc.h

Ref #727
This commit is contained in:
Jerome Robert 2016-01-03 13:59:37 +01:00
parent e3e20e2242
commit 87a2ccc37c
4 changed files with 85 additions and 53 deletions

View File

@ -727,6 +727,7 @@ typedef struct {
#endif
#ifndef ASSEMBLER
#include "common_stackalloc.h"
#if 0
#include "symcopy.h"
#endif

73
common_stackalloc.h Normal file
View File

@ -0,0 +1,73 @@
/*******************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define STACK_ALLOC_PROTECT
#ifdef STACK_ALLOC_PROTECT
// Try to detect stack smashing
#include <assert.h>
#define STACK_ALLOC_PROTECT_SET volatile BLASLONG stack_check = 0x7ff8010203040506;
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7ff8010203040506);
#else
#define STACK_ALLOC_PROTECT_SET
#define STACK_ALLOC_PROTECT_CHECK
#endif
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
/*
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
* when OpenBLAS is used from a multi-threaded application.
* SIZE must be carefully chosen to be:
* - as small as possible to maximize the number of stack allocation
* - large enough to support all architectures and kernel
* Chosing a too small SIZE will lead to a stack smashing.
*/
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
/* make it volatile because some function (ex: dgemv_n.S) */ \
/* do not restore all register */ \
volatile int stack_alloc_size = SIZE; \
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
stack_alloc_size = 0; \
STACK_ALLOC_PROTECT_SET \
TYPE stack_buffer[stack_alloc_size]; \
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
#else
//Original OpenBLAS/GotoBLAS codes.
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
#endif
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
#define STACK_FREE(BUFFER) \
STACK_ALLOC_PROTECT_CHECK \
if(!stack_alloc_size) \
blas_memory_free(BUFFER);
#else
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
#endif

View File

@ -37,7 +37,6 @@
/*********************************************************************/
#include <stdio.h>
#include <assert.h>
#include "common.h"
#include "l1param.h"
#ifdef FUNCTION_PROFILE
@ -80,6 +79,7 @@ void NAME(char *TRANS, blasint *M, blasint *N,
FLOAT alpha = *ALPHA;
FLOAT beta = *BETA;
FLOAT *buffer;
int buffer_size;
#ifdef SMP
int nthreads;
int nthreads_max;
@ -135,7 +135,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
blasint lenx, leny;
int trans;
int trans, buffer_size;
blasint info, t;
#ifdef SMP
int nthreads;
@ -216,33 +216,13 @@ void CNAME(enum CBLAS_ORDER order,
if (incx < 0) x -= (lenx - 1) * incx;
if (incy < 0) y -= (leny - 1) * incy;
#ifdef MAX_STACK_ALLOC
// make it volatile because some gemv implementation (ex: dgemv_n.S)
// do not restore all register
volatile int stack_alloc_size = 0;
//for gemv_n and gemv_t, try to allocate on stack
stack_alloc_size = m + n;
#ifdef ALIGNED_ACCESS
stack_alloc_size += 3;
#endif
// if(stack_alloc_size < 128)
//dgemv_n.S require a 128 bytes buffer
// increasing instead of capping 128
// ABI STACK for windows 288 bytes
stack_alloc_size += 288 / sizeof(FLOAT) ;
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
stack_alloc_size = 0;
// stack overflow check
volatile double stack_check = 3.14159265358979323846;
FLOAT stack_buffer[stack_alloc_size];
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
// printf("stack_alloc_size=%d\n", stack_alloc_size);
#else
//Original OpenBLAS/GotoBLAS codes.
buffer = (FLOAT *)blas_memory_alloc(1);
buffer_size = m + n + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT) ;
#endif
// for alignment
buffer_size = (buffer_size + 3) & ~3;
STACK_ALLOC(buffer_size, FLOAT, buffer);
#ifdef SMP
@ -271,17 +251,7 @@ void CNAME(enum CBLAS_ORDER order,
}
#endif
#ifdef MAX_STACK_ALLOC
// stack overflow check
assert(stack_check==3.14159265358979323846);
if(!stack_alloc_size){
blas_memory_free(buffer);
}
#else
blas_memory_free(buffer);
#endif
STACK_FREE(buffer);
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
IDEBUG_END;

View File

@ -171,15 +171,7 @@ void CNAME(enum CBLAS_ORDER order,
if (incy < 0) y -= (n - 1) * incy;
if (incx < 0) x -= (m - 1) * incx;
#ifdef MAX_STACK_ALLOC
volatile int stack_alloc_size = m;
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
stack_alloc_size = 0;
FLOAT stack_buffer[stack_alloc_size];
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
#else
buffer = (FLOAT *)blas_memory_alloc(1);
#endif
STACK_ALLOC(m, FLOAT, buffer);
#ifdef SMPTEST
nthreads = num_cpu_avail(2);
@ -198,11 +190,7 @@ void CNAME(enum CBLAS_ORDER order,
}
#endif
#ifdef MAX_STACK_ALLOC
if(!stack_alloc_size)
#endif
blas_memory_free(buffer);
STACK_FREE(buffer);
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
IDEBUG_END;