commit
6caa40302e
|
@ -121,6 +121,11 @@ In chronological order:
|
||||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
|
||||||
ARMv8 support.
|
ARMv8 support.
|
||||||
|
|
||||||
|
* Jerome Robert <jeromerobert@gmx.com>
|
||||||
|
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
|
||||||
|
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
|
||||||
|
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
|
||||||
|
|
||||||
* Dan Kortschak
|
* Dan Kortschak
|
||||||
* [2015-01-07] Added test for drotmg bug #484.
|
* [2015-01-07] Added test for drotmg bug #484.
|
||||||
|
|
||||||
|
|
|
@ -174,8 +174,11 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
STACK_ALLOC(m, FLOAT, buffer);
|
STACK_ALLOC(m, FLOAT, buffer);
|
||||||
|
|
||||||
#ifdef SMPTEST
|
#ifdef SMPTEST
|
||||||
|
// Threshold chosen so that speed-up is > 1 on a Xeon E5-2630
|
||||||
|
if(1L * m * n > 24L * GEMM_MULTITHREAD_THRESHOLD)
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
else
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -77,12 +77,13 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){
|
||||||
if (incy < 0) y -= (n - 1) * incy;
|
if (incy < 0) y -= (n - 1) * incy;
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
nthreads = num_cpu_avail(1);
|
|
||||||
|
|
||||||
//disable multi-thread when incx==0 or incy==0
|
//disable multi-thread when incx==0 or incy==0
|
||||||
//In that case, the threads would be dependent.
|
//In that case, the threads would be dependent.
|
||||||
if (incx == 0 || incy == 0)
|
if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT))
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
else
|
||||||
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -213,7 +213,11 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMPTEST
|
#ifdef SMPTEST
|
||||||
|
// Threshold chosen so that speed-up is > 1 on a Xeon E5-2630
|
||||||
|
if(1L * m * n > 3L * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD)
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
else
|
||||||
|
nthreads = 1;
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue