parent
e3e20e2242
commit
87a2ccc37c
1
common.h
1
common.h
|
@ -727,6 +727,7 @@ typedef struct {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
#include "common_stackalloc.h"
|
||||||
#if 0
|
#if 0
|
||||||
#include "symcopy.h"
|
#include "symcopy.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#define STACK_ALLOC_PROTECT
|
||||||
|
#ifdef STACK_ALLOC_PROTECT
|
||||||
|
// Try to detect stack smashing
|
||||||
|
#include <assert.h>
|
||||||
|
#define STACK_ALLOC_PROTECT_SET volatile BLASLONG stack_check = 0x7ff8010203040506;
|
||||||
|
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7ff8010203040506);
|
||||||
|
#else
|
||||||
|
#define STACK_ALLOC_PROTECT_SET
|
||||||
|
#define STACK_ALLOC_PROTECT_CHECK
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC.
|
||||||
|
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly
|
||||||
|
* when OpenBLAS is used from a multi-threaded application.
|
||||||
|
* SIZE must be carefully chosen to be:
|
||||||
|
* - as small as possible to maximize the number of stack allocation
|
||||||
|
* - large enough to support all architectures and kernel
|
||||||
|
* Chosing a too small SIZE will lead to a stack smashing.
|
||||||
|
*/
|
||||||
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \
|
||||||
|
/* make it volatile because some function (ex: dgemv_n.S) */ \
|
||||||
|
/* do not restore all register */ \
|
||||||
|
volatile int stack_alloc_size = SIZE; \
|
||||||
|
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \
|
||||||
|
stack_alloc_size = 0; \
|
||||||
|
STACK_ALLOC_PROTECT_SET \
|
||||||
|
TYPE stack_buffer[stack_alloc_size]; \
|
||||||
|
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1);
|
||||||
|
#else
|
||||||
|
//Original OpenBLAS/GotoBLAS codes.
|
||||||
|
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0
|
||||||
|
#define STACK_FREE(BUFFER) \
|
||||||
|
STACK_ALLOC_PROTECT_CHECK \
|
||||||
|
if(!stack_alloc_size) \
|
||||||
|
blas_memory_free(BUFFER);
|
||||||
|
#else
|
||||||
|
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER)
|
||||||
|
#endif
|
||||||
|
|
|
@ -37,7 +37,6 @@
|
||||||
/*********************************************************************/
|
/*********************************************************************/
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <assert.h>
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "l1param.h"
|
#include "l1param.h"
|
||||||
#ifdef FUNCTION_PROFILE
|
#ifdef FUNCTION_PROFILE
|
||||||
|
@ -80,6 +79,7 @@ void NAME(char *TRANS, blasint *M, blasint *N,
|
||||||
FLOAT alpha = *ALPHA;
|
FLOAT alpha = *ALPHA;
|
||||||
FLOAT beta = *BETA;
|
FLOAT beta = *BETA;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
|
int buffer_size;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
int nthreads_max;
|
int nthreads_max;
|
||||||
|
@ -135,7 +135,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
blasint lenx, leny;
|
blasint lenx, leny;
|
||||||
int trans;
|
int trans, buffer_size;
|
||||||
blasint info, t;
|
blasint info, t;
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
int nthreads;
|
int nthreads;
|
||||||
|
@ -216,33 +216,13 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
if (incx < 0) x -= (lenx - 1) * incx;
|
if (incx < 0) x -= (lenx - 1) * incx;
|
||||||
if (incy < 0) y -= (leny - 1) * incy;
|
if (incy < 0) y -= (leny - 1) * incy;
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
buffer_size = m + n + 128 / sizeof(FLOAT);
|
||||||
// make it volatile because some gemv implementation (ex: dgemv_n.S)
|
#ifdef WINDOWS_ABI
|
||||||
// do not restore all register
|
buffer_size += 160 / sizeof(FLOAT) ;
|
||||||
volatile int stack_alloc_size = 0;
|
|
||||||
//for gemv_n and gemv_t, try to allocate on stack
|
|
||||||
stack_alloc_size = m + n;
|
|
||||||
#ifdef ALIGNED_ACCESS
|
|
||||||
stack_alloc_size += 3;
|
|
||||||
#endif
|
|
||||||
// if(stack_alloc_size < 128)
|
|
||||||
//dgemv_n.S require a 128 bytes buffer
|
|
||||||
// increasing instead of capping 128
|
|
||||||
// ABI STACK for windows 288 bytes
|
|
||||||
stack_alloc_size += 288 / sizeof(FLOAT) ;
|
|
||||||
|
|
||||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
|
|
||||||
stack_alloc_size = 0;
|
|
||||||
|
|
||||||
// stack overflow check
|
|
||||||
volatile double stack_check = 3.14159265358979323846;
|
|
||||||
FLOAT stack_buffer[stack_alloc_size];
|
|
||||||
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
|
|
||||||
// printf("stack_alloc_size=%d\n", stack_alloc_size);
|
|
||||||
#else
|
|
||||||
//Original OpenBLAS/GotoBLAS codes.
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
|
||||||
#endif
|
#endif
|
||||||
|
// for alignment
|
||||||
|
buffer_size = (buffer_size + 3) & ~3;
|
||||||
|
STACK_ALLOC(buffer_size, FLOAT, buffer);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
|
|
||||||
|
@ -271,17 +251,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
STACK_FREE(buffer);
|
||||||
// stack overflow check
|
|
||||||
assert(stack_check==3.14159265358979323846);
|
|
||||||
|
|
||||||
if(!stack_alloc_size){
|
|
||||||
blas_memory_free(buffer);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
blas_memory_free(buffer);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||||
|
|
||||||
IDEBUG_END;
|
IDEBUG_END;
|
||||||
|
|
|
@ -171,15 +171,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
if (incy < 0) y -= (n - 1) * incy;
|
if (incy < 0) y -= (n - 1) * incy;
|
||||||
if (incx < 0) x -= (m - 1) * incx;
|
if (incx < 0) x -= (m - 1) * incx;
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
STACK_ALLOC(m, FLOAT, buffer);
|
||||||
volatile int stack_alloc_size = m;
|
|
||||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
|
|
||||||
stack_alloc_size = 0;
|
|
||||||
FLOAT stack_buffer[stack_alloc_size];
|
|
||||||
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
|
|
||||||
#else
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef SMPTEST
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
@ -198,11 +190,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef MAX_STACK_ALLOC
|
STACK_FREE(buffer);
|
||||||
if(!stack_alloc_size)
|
|
||||||
#endif
|
|
||||||
blas_memory_free(buffer);
|
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||||
|
|
||||||
IDEBUG_END;
|
IDEBUG_END;
|
||||||
|
|
Loading…
Reference in New Issue