From 3ae30cd6b9407cea58aa9d32c3f8e8916153cf8e Mon Sep 17 00:00:00 2001 From: Jerome Robert Date: Fri, 15 Jan 2016 17:12:04 +0100 Subject: [PATCH 1/3] Disable multi-threading for small matrices in [z]ger Ref #731 --- interface/ger.c | 7 +++++-- interface/zger.c | 6 +++++- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/interface/ger.c b/interface/ger.c index 2d19e3bc0..b2f803dec 100644 --- a/interface/ger.c +++ b/interface/ger.c @@ -174,8 +174,11 @@ void CNAME(enum CBLAS_ORDER order, STACK_ALLOC(m, FLOAT, buffer); #ifdef SMPTEST - nthreads = num_cpu_avail(2); - + // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 + if(1L * m * n > 24L * GEMM_MULTITHREAD_THRESHOLD) + nthreads = num_cpu_avail(2); + else + nthreads = 1; if (nthreads == 1) { #endif diff --git a/interface/zger.c b/interface/zger.c index f46a462e2..5bcbfc09b 100644 --- a/interface/zger.c +++ b/interface/zger.c @@ -213,7 +213,11 @@ void CNAME(enum CBLAS_ORDER order, buffer = (FLOAT *)blas_memory_alloc(1); #ifdef SMPTEST - nthreads = num_cpu_avail(2); + // Threshold chosen so that speed-up is > 1 on a Xeon E5-2630 + if(1L * m * n > 3L * sizeof(FLOAT) * GEMM_MULTITHREAD_THRESHOLD) + nthreads = num_cpu_avail(2); + else + nthreads = 1; if (nthreads == 1) { #endif From 66eafb16cfaf7e119afff96ff3e44663134141fa Mon Sep 17 00:00:00 2001 From: Jerome Robert Date: Mon, 18 Jan 2016 09:12:37 +0100 Subject: [PATCH 2/3] swap: disable multi-threading for small matrices Close #731 --- interface/swap.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/interface/swap.c b/interface/swap.c index 3baeb27c4..23b2e4ec8 100644 --- a/interface/swap.c +++ b/interface/swap.c @@ -77,12 +77,13 @@ void CNAME(blasint n, FLOAT *x, blasint incx, FLOAT *y, blasint incy){ if (incy < 0) y -= (n - 1) * incy; #ifdef SMP - nthreads = num_cpu_avail(1); //disable multi-thread when incx==0 or incy==0 //In that case, the threads would be dependent. - if (incx == 0 || incy == 0) - nthreads = 1; + if (incx == 0 || incy == 0 || n < 2097152 * GEMM_MULTITHREAD_THRESHOLD / sizeof(FLOAT)) + nthreads = 1; + else + nthreads = num_cpu_avail(1); if (nthreads == 1) { #endif From 14db1ca508575835bf4dffb8003df1b88adf4f32 Mon Sep 17 00:00:00 2001 From: Jerome Robert Date: Tue, 19 Jan 2016 17:15:31 +0100 Subject: [PATCH 3/3] update CONTRIBUTORS.md --- CONTRIBUTORS.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 88e461dc4..b1c8dd140 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -121,6 +121,11 @@ In chronological order: * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). ARMv8 support. +* Jerome Robert + * [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478) + * [2015-12-23] `stack_check` in `gemv.c` (bug #722) + * [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731) + * Dan Kortschak * [2015-01-07] Added test for drotmg bug #484.