parent
e3e20e2242
commit
87a2ccc37c
1
common.h
1
common.h
|
@ -727,6 +727,7 @@ typedef struct {
|
|||
#endif
|
||||
|
||||
#ifndef ASSEMBLER
|
||||
#include "common_stackalloc.h"
|
||||
#if 0
|
||||
#include "symcopy.h"
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2016, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
|
||||
#define STACK_ALLOC_PROTECT
|
||||
#ifdef STACK_ALLOC_PROTECT
|
||||
// Try to detect stack smashing
|
||||
#include <assert.h>
|
||||
#define STACK_ALLOC_PROTECT_SET volatile BLASLONG stack_check = 0x7ff8010203040506;
|
||||
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7ff8010203040506);
|
||||
#else
|
||||
#define STACK_ALLOC_PROTECT_SET
|
||||
#define STACK_ALLOC_PROTECT_CHECK
|
||||
#endif
|
||||
|
||||
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||
|
||||
/*
|
||||
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
|
||||
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
|
||||
* when OpenBLAS is used from a multi-threaded application.
|
||||
* SIZE must be carefully chosen to be:
|
||||
* - as small as possible to maximize the number of stack allocation
|
||||
* - large enough to support all architectures and kernel
|
||||
* Chosing a too small SIZE will lead to a stack smashing.
|
||||
*/
|
||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||
/* do not restore all register */ \
|
||||
volatile int stack_alloc_size = SIZE; \
|
||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
|
||||
stack_alloc_size = 0; \
|
||||
STACK_ALLOC_PROTECT_SET \
|
||||
TYPE stack_buffer[stack_alloc_size]; \
|
||||
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
|
||||
#else
|
||||
//Original OpenBLAS/GotoBLAS codes.
|
||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||
#define STACK_FREE(BUFFER) \
|
||||
STACK_ALLOC_PROTECT_CHECK \
|
||||
if(!stack_alloc_size) \
|
||||
blas_memory_free(BUFFER);
|
||||
#else
|
||||
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
|
||||
#endif
|
||||
|
|
@ -37,7 +37,6 @@
|
|||
/*********************************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include "common.h"
|
||||
#include "l1param.h"
|
||||
#ifdef FUNCTION_PROFILE
|
||||
|
@ -80,6 +79,7 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
|||
FLOAT alpha = *ALPHA;
|
||||
FLOAT beta = *BETA;
|
||||
FLOAT *buffer;
|
||||
int buffer_size;
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
int nthreads_max;
|
||||
|
@ -135,7 +135,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
FLOAT *buffer;
|
||||
blasint lenx, leny;
|
||||
int trans;
|
||||
int trans, buffer_size;
|
||||
blasint info, t;
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
|
@ -216,33 +216,13 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
if (incx < 0) x -= (lenx - 1) * incx;
|
||||
if (incy < 0) y -= (leny - 1) * incy;
|
||||
|
||||
#ifdef MAX_STACK_ALLOC
|
||||
// make it volatile because some gemv implementation (ex: dgemv_n.S)
|
||||
// do not restore all register
|
||||
volatile int stack_alloc_size = 0;
|
||||
//for gemv_n and gemv_t, try to allocate on stack
|
||||
stack_alloc_size = m + n;
|
||||
#ifdef ALIGNED_ACCESS
|
||||
stack_alloc_size += 3;
|
||||
#endif
|
||||
// if(stack_alloc_size < 128)
|
||||
//dgemv_n.S require a 128 bytes buffer
|
||||
// increasing instead of capping 128
|
||||
// ABI STACK for windows 288 bytes
|
||||
stack_alloc_size += 288 / sizeof(FLOAT) ;
|
||||
|
||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
|
||||
stack_alloc_size = 0;
|
||||
|
||||
// stack overflow check
|
||||
volatile double stack_check = 3.14159265358979323846;
|
||||
FLOAT stack_buffer[stack_alloc_size];
|
||||
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
|
||||
// printf("stack_alloc_size=%d\n", stack_alloc_size);
|
||||
#else
|
||||
//Original OpenBLAS/GotoBLAS codes.
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
buffer_size = m + n + 128 / sizeof(FLOAT);
|
||||
#ifdef WINDOWS_ABI
|
||||
buffer_size += 160 / sizeof(FLOAT) ;
|
||||
#endif
|
||||
// for alignment
|
||||
buffer_size = (buffer_size + 3) & ~3;
|
||||
STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
|
||||
|
@ -271,17 +251,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef MAX_STACK_ALLOC
|
||||
// stack overflow check
|
||||
assert(stack_check==3.14159265358979323846);
|
||||
|
||||
if(!stack_alloc_size){
|
||||
blas_memory_free(buffer);
|
||||
}
|
||||
#else
|
||||
blas_memory_free(buffer);
|
||||
#endif
|
||||
|
||||
STACK_FREE(buffer);
|
||||
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
|
|
@ -171,15 +171,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
if (incy < 0) y -= (n - 1) * incy;
|
||||
if (incx < 0) x -= (m - 1) * incx;
|
||||
|
||||
#ifdef MAX_STACK_ALLOC
|
||||
volatile int stack_alloc_size = m;
|
||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
|
||||
stack_alloc_size = 0;
|
||||
FLOAT stack_buffer[stack_alloc_size];
|
||||
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
|
||||
#else
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
#endif
|
||||
STACK_ALLOC(m, FLOAT, buffer);
|
||||
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
@ -198,11 +190,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef MAX_STACK_ALLOC
|
||||
if(!stack_alloc_size)
|
||||
#endif
|
||||
blas_memory_free(buffer);
|
||||
|
||||
STACK_FREE(buffer);
|
||||
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||
|
||||
IDEBUG_END;
|
||||
|
|
Loading…
Reference in New Issue