diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 88e461dc4..b1c8dd140 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -121,6 +121,11 @@ In chronological order: * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). ARMv8 support. +* Jerome Robert + * [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478) + * [2015-12-23] `stack_check` in `gemv.c` (bug #722) + * [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731) + * Dan Kortschak * [2015-01-07] Added test for drotmg bug #484. diff --git a/interface/ger.c b/interface/ger.c index 2d19e3bc0..b2f803dec 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -174,8 +174,11 @@ void CNAME(enum CBLAS_ORDER order, STACK_ALLOC(m, FLOAT, buffer); #ifdef SMPTEST - nthreads = num_cpu_avail(2); - + // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 + if(1L * m * n > 24L * GEMM_MULTITHREAD_THRESHOLD) + nthreads = num_cpu_avail(2); + else + nthreads = 1; if (nthreads == 1) { #endif diff --git a/interface/swap.c b/interface/swap.c index 3baeb27c4..23b2e4ec8 100644 --- a/interface/swap.c +++ b/interface/swap.c @@ -77,12 +77,13 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ if (incy < 0) y -= (n - 1) * incy; #ifdef SMP - nthreads = num_cpu_avail(1); //disable multi-thread when incx==0 or incy==0 //In that case, the threads would be dependent. - if (incx == 0 || incy == 0) - nthreads = 1; + if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT)) + nthreads = 1; + else + nthreads = num_cpu_avail(1); if (nthreads == 1) { #endif diff --git a/interface/zger.c b/interface/zger.c index f46a462e2..5bcbfc09b 100644 --- a/interface/zger.c +++ b/interface/zger.c @@ -213,7 +213,11 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMPTEST - nthreads = num_cpu_avail(2); + // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 + if(1L * m * n > 3L * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) + nthreads = num_cpu_avail(2); + else + nthreads = 1; if (nthreads == 1) { #endif