Use stack allocation in zgemv and zger

For better performance with small matrices
Ref #727
This commit is contained in:
Jerome Robert 2016-01-03 14:01:12 +01:00
parent 0e68beb89f
commit 32f793195f
2 changed files with 12 additions and 5 deletions

View File

@ -77,6 +77,7 @@ void NAME(char *TRANS, blasint *M, blasint *N,
blasint incy = *INCY;
FLOAT *buffer;
int buffer_size;
#ifdef SMP
int nthreads;
#endif
@ -141,7 +142,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
blasint lenx, leny;
int trans;
int trans, buffer_size;
blasint info, t;
#ifdef SMP
int nthreads;
@ -230,7 +231,13 @@ void CNAME(enum CBLAS_ORDER order,
if (incx < 0) x -= (lenx - 1) * incx * 2;
if (incy < 0) y -= (leny - 1) * incy * 2;
buffer = (FLOAT *)blas_memory_alloc(1);
buffer_size = 2 * (m + n) + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT) ;
#endif
// for alignment
buffer_size = (buffer_size + 3) & ~3;
STACK_ALLOC(buffer_size, FLOAT, buffer);
#ifdef SMP
@ -253,7 +260,7 @@ void CNAME(enum CBLAS_ORDER order,
}
#endif
blas_memory_free(buffer);
STACK_FREE(buffer);
FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n);

View File

@ -210,7 +210,7 @@ void CNAME(enum CBLAS_ORDER order,
if (incy < 0) y -= (n - 1) * incy * 2;
if (incx < 0) x -= (m - 1) * incx * 2;
buffer = (FLOAT *)blas_memory_alloc(1);
STACK_ALLOC(2 * m, FLOAT, buffer);
#ifdef SMPTEST
// Threshold chosen so that speed-up is > 1 on a Xeon E5-2630
@ -249,7 +249,7 @@ void CNAME(enum CBLAS_ORDER order,
}
#endif
blas_memory_free(buffer);
STACK_FREE(buffer);
FUNCTION_PROFILE_END(4, m * n + m + n, 2 * m * n);