Allow to do gemv and ger buffer allocation on the stack
ger and gemv call blas_memory_alloc/free which in their turn call blas_lock. blas_lock create thread contention when matrices are small and the number of thread is high enough. We avoid call blas_memory_alloc by replacing it with stack allocation. This can be enabled with: make -DMAX_STACK_ALLOC=2048 The given size (in byte) must be high enough to avoid thread contention and small enough to avoid stack overflow. Fix #478
This commit is contained in:
@@ -171,7 +171,15 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
if (incy < 0) y -= (n - 1) * incy;
|
||||
if (incx < 0) x -= (m - 1) * incx;
|
||||
|
||||
#ifdef MAX_STACK_ALLOC
|
||||
int stack_alloc_size = m;
|
||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(FLOAT))
|
||||
stack_alloc_size = 0;
|
||||
FLOAT stack_buffer[stack_alloc_size];
|
||||
buffer = stack_alloc_size ? stack_buffer : (FLOAT *)blas_memory_alloc(1);
|
||||
#else
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
#endif
|
||||
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
@@ -190,7 +198,10 @@ void CNAME(enum CBLAS_ORDER order,
|
||||
}
|
||||
#endif
|
||||
|
||||
blas_memory_free(buffer);
|
||||
#ifdef MAX_STACK_ALLOC
|
||||
if(!stack_alloc_size)
|
||||
#endif
|
||||
blas_memory_free(buffer);
|
||||
|
||||
FUNCTION_PROFILE_END(1, m * n + m + n, 2 * m * n);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user