diff --git a/interface/symm.c b/interface/symm.c index 0e29a5f48..3e65e69b1 100644 --- a/interface/symm.c +++ b/interface/symm.c @@ -44,6 +44,7 @@ #endif #ifndef COMPLEX +#define SMP_THRESHOLD_MIN 65536. #ifdef XDOUBLE #define ERROR_NAME "QSYMM " #elif defined(DOUBLE) @@ -52,6 +53,7 @@ #define ERROR_NAME "SSYMM " #endif #else +#define SMP_THRESHOLD_MIN 8192. #ifndef GEMM3M #ifndef HEMM #ifdef XDOUBLE @@ -91,6 +93,10 @@ #endif #endif +#ifndef GEMM_MULTITHREAD_THRESHOLD +#define GEMM_MULTITHREAD_THRESHOLD 4 +#endif + #ifdef SMP #ifndef COMPLEX @@ -159,7 +165,9 @@ void NAME(char *SIDE, char *UPLO, #if defined(SMP) && !defined(NO_AFFINITY) int nodes; #endif - +# if defined(SMP) + int MN; +#endif blasint info; int side; int uplo; @@ -255,6 +263,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, #if defined(SMP) && !defined(NO_AFFINITY) int nodes; #endif +#if defined(SMP) + int MN; +#endif PRINT_DEBUG_CNAME; @@ -375,15 +386,18 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, #ifdef SMP args.common = NULL; - args.nthreads = num_cpu_avail(3); - + MN = 2.* (double) args.m * (double)args.m * (double) args.n; + if (MN <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) { + args.nthreads = 1; + } else { + args.nthreads = num_cpu_avail(3); + } if (args.nthreads == 1) { #endif (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP - } else { #ifndef NO_AFFINITY diff --git a/interface/syrk.c b/interface/syrk.c index edb113d6c..3b056aec8 100644 --- a/interface/syrk.c +++ b/interface/syrk.c @@ -44,6 +44,7 @@ #endif #ifndef COMPLEX +#define SMP_THRESHOLD_MIN 109944. #ifdef XDOUBLE #define ERROR_NAME "QSYRK " #elif defined(DOUBLE) @@ -52,6 +53,7 @@ #define ERROR_NAME "SSYRK " #endif #else +#define SMP_THRESHOLD_MIN 14824. #ifndef HEMM #ifdef XDOUBLE #define ERROR_NAME "XSYRK " @@ -71,6 +73,10 @@ #endif #endif +#ifndef GEMM_MULTITHREAD_THRESHOLD +#define GEMM_MULTITHREAD_THRESHOLD 4 +#endif + static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { #ifndef HEMM SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC, @@ -101,6 +107,7 @@ void NAME(char *UPLO, char *TRANS, FLOAT *sa, *sb; #ifdef SMP + int NNK; #ifdef USE_SIMPLE_THREADED_LEVEL3 #ifndef COMPLEX #ifdef XDOUBLE @@ -225,6 +232,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr FLOAT *sa, *sb; #ifdef SMP +int NNK; + #ifdef USE_SIMPLE_THREADED_LEVEL3 #ifndef COMPLEX #ifdef XDOUBLE @@ -354,18 +363,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr #endif args.common = NULL; -#ifndef COMPLEX -#ifdef DOUBLE - if (args.n < 100) -#else - if (args.n < 200) -#endif -#else - if (args.n < 65) -#endif + + NNK = (double)(args.n+1)*(double)args.n*(double)args.k; + if (NNK <= (SMP_THRESHOLD_MIN * GEMM_MULTITHREAD_THRESHOLD)) { args.nthreads = 1; - else + } else { args.nthreads = num_cpu_avail(3); + } if (args.nthreads == 1) { #endif @@ -373,7 +377,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0); #ifdef SMP - } else { #ifndef USE_SIMPLE_THREADED_LEVEL3