From 4b0de7690d5e3c42f0ca3ec5a3e4031eb50bb187 Mon Sep 17 00:00:00 2001 From: Craig Donner Date: Thu, 7 Jun 2018 14:54:42 +0100 Subject: [PATCH] Improve performance of GEMM for small matrices when SMP is defined. Always checking num_cpu_avail() regardless of whether threading will actually be used adds noticeable overhead for small matrices. Most other uses of num_cpu_avail() do so only if threading will be used, so do the same here. --- interface/gemm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/interface/gemm.c b/interface/gemm.c index 8baf3fbec..fa1980115 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -411,20 +411,22 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); - nthreads_max = num_cpu_avail(3); - nthreads_avail = nthreads_max; - #ifndef COMPLEX MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; + else + nthreads_max = num_cpu_avail(3); #else MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; + else + nthreads_max = num_cpu_avail(3); #endif args.common = NULL; + nthreads_avail = nthreads_max; if ( nthreads_max > nthreads_avail ) args.nthreads = nthreads_avail; else