Merge pull request #3960 from martin-frbg/symmsyrk_sp

Add multithreading threshold for SYMM and rework the one for SYRK
This commit is contained in:
Martin Kroeker 2023-03-26 17:02:39 +02:00 committed by GitHub
commit 2d39e715e2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 32 additions and 15 deletions

View File

@ -44,6 +44,7 @@
#endif #endif
#ifndef COMPLEX #ifndef COMPLEX
#define SMP_THRESHOLD_MIN 65536.
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QSYMM " #define ERROR_NAME "QSYMM "
#elif defined(DOUBLE) #elif defined(DOUBLE)
@ -52,6 +53,7 @@
#define ERROR_NAME "SSYMM " #define ERROR_NAME "SSYMM "
#endif #endif
#else #else
#define SMP_THRESHOLD_MIN 8192.
#ifndef GEMM3M #ifndef GEMM3M
#ifndef HEMM #ifndef HEMM
#ifdef XDOUBLE #ifdef XDOUBLE
@ -91,6 +93,10 @@
#endif #endif
#endif #endif
#ifndef GEMM_MULTITHREAD_THRESHOLD
#define GEMM_MULTITHREAD_THRESHOLD 4
#endif
#ifdef SMP #ifdef SMP
#ifndef COMPLEX #ifndef COMPLEX
@ -159,7 +165,9 @@ void NAME(char *SIDE, char *UPLO,
#if defined(SMP) && !defined(NO_AFFINITY) #if defined(SMP) && !defined(NO_AFFINITY)
int nodes; int nodes;
#endif #endif
# if defined(SMP)
int MN;
#endif
blasint info; blasint info;
int side; int side;
int uplo; int uplo;
@ -255,6 +263,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
#if defined(SMP) && !defined(NO_AFFINITY) #if defined(SMP) && !defined(NO_AFFINITY)
int nodes; int nodes;
#endif #endif
#if defined(SMP)
int MN;
#endif
PRINT_DEBUG_CNAME; PRINT_DEBUG_CNAME;
@ -375,15 +386,18 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo,
#ifdef SMP #ifdef SMP
args.common = NULL; args.common = NULL;
args.nthreads = num_cpu_avail(3); MN = 2.* (double) args.m * (double)args.m * (double) args.n;
if (MN <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
args.nthreads = 1;
} else {
args.nthreads = num_cpu_avail(3);
}
if (args.nthreads == 1) { if (args.nthreads == 1) {
#endif #endif
(symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0); (symm[(side << 1) | uplo ])(&args, NULL, NULL, sa, sb, 0);
#ifdef SMP #ifdef SMP
} else { } else {
#ifndef NO_AFFINITY #ifndef NO_AFFINITY

View File

@ -44,6 +44,7 @@
#endif #endif
#ifndef COMPLEX #ifndef COMPLEX
#define SMP_THRESHOLD_MIN 109944.
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QSYRK " #define ERROR_NAME "QSYRK "
#elif defined(DOUBLE) #elif defined(DOUBLE)
@ -52,6 +53,7 @@
#define ERROR_NAME "SSYRK " #define ERROR_NAME "SSYRK "
#endif #endif
#else #else
#define SMP_THRESHOLD_MIN 14824.
#ifndef HEMM #ifndef HEMM
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "XSYRK " #define ERROR_NAME "XSYRK "
@ -71,6 +73,10 @@
#endif #endif
#endif #endif
#ifndef GEMM_MULTITHREAD_THRESHOLD
#define GEMM_MULTITHREAD_THRESHOLD 4
#endif
static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = { static int (*syrk[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT *, FLOAT *, BLASLONG) = {
#ifndef HEMM #ifndef HEMM
SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC, SYRK_UN, SYRK_UC, SYRK_LN, SYRK_LC,
@ -101,6 +107,7 @@ void NAME(char *UPLO, char *TRANS,
FLOAT *sa, *sb; FLOAT *sa, *sb;
#ifdef SMP #ifdef SMP
int NNK;
#ifdef USE_SIMPLE_THREADED_LEVEL3 #ifdef USE_SIMPLE_THREADED_LEVEL3
#ifndef COMPLEX #ifndef COMPLEX
#ifdef XDOUBLE #ifdef XDOUBLE
@ -225,6 +232,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
FLOAT *sa, *sb; FLOAT *sa, *sb;
#ifdef SMP #ifdef SMP
int NNK;
#ifdef USE_SIMPLE_THREADED_LEVEL3 #ifdef USE_SIMPLE_THREADED_LEVEL3
#ifndef COMPLEX #ifndef COMPLEX
#ifdef XDOUBLE #ifdef XDOUBLE
@ -354,18 +363,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
#endif #endif
args.common = NULL; args.common = NULL;
#ifndef COMPLEX
#ifdef DOUBLE NNK = (double)(args.n+1)*(double)args.n*(double)args.k;
if (args.n < 100) if (NNK <= (SMP_THRESHOLD_MIN * GEMM_MULTITHREAD_THRESHOLD)) {
#else
if (args.n < 200)
#endif
#else
if (args.n < 65)
#endif
args.nthreads = 1; args.nthreads = 1;
else } else {
args.nthreads = num_cpu_avail(3); args.nthreads = num_cpu_avail(3);
}
if (args.nthreads == 1) { if (args.nthreads == 1) {
#endif #endif
@ -373,7 +377,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
(syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0); (syrk[(uplo << 1) | trans ])(&args, NULL, NULL, sa, sb, 0);
#ifdef SMP #ifdef SMP
} else { } else {
#ifndef USE_SIMPLE_THREADED_LEVEL3 #ifndef USE_SIMPLE_THREADED_LEVEL3