Shift transition to multithreading towards larger matrix sizes

See #1886 and JuliaRobotics issue 500. trsm benchmarks on Haswell and Zen showed that with these values performance is roughly doubled for matrix sizes between 8x8 and 14x14, and still 10 to 20 percent better near the new cutoff at 32x32.
This commit is contained in:
Martin Kroeker 2019-01-19 00:10:01 +01:00 committed by GitHub
parent 256eb588bb
commit cda81cfae0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 8 additions and 2 deletions

View File

@ -81,6 +81,12 @@
#endif #endif
#endif #endif
#ifndef COMPLEX
#define SMP_FACTOR 8
#else
#define SMP_FACTOR 4
#endif
static int (*trsm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { static int (*trsm[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
#ifndef TRMM #ifndef TRMM
TRSM_LNUU, TRSM_LNUN, TRSM_LNLU, TRSM_LNLN, TRSM_LNUU, TRSM_LNUN, TRSM_LNLU, TRSM_LNLN,
@ -366,10 +372,10 @@ void CNAME(enum CBLAS_ORDER order,
mode |= (trans << BLAS_TRANSA_SHIFT); mode |= (trans << BLAS_TRANSA_SHIFT);
mode |= (side << BLAS_RSIDE_SHIFT); mode |= (side << BLAS_RSIDE_SHIFT);
if ( args.m < 2*GEMM_MULTITHREAD_THRESHOLD ) if ( args.m < SMP_FACTOR * GEMM_MULTITHREAD_THRESHOLD )
args.nthreads = 1; args.nthreads = 1;
else else
if ( args.n < 2*GEMM_MULTITHREAD_THRESHOLD ) if ( args.n < SMP_FACTOR * GEMM_MULTITHREAD_THRESHOLD )
args.nthreads = 1; args.nthreads = 1;
else else
args.nthreads = num_cpu_avail(3); args.nthreads = num_cpu_avail(3);