Use a single thread for small input size

copies daxpy improvement from #27, see #1560
This commit is contained in:
Martin Kroeker 2018-06-07 10:26:55 +02:00 committed by GitHub
parent 0218b884c1
commit e8880c1699
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 2 deletions

View File

@ -41,7 +41,11 @@
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif
#if defined(Z13)
#define MULTI_THREAD_MINIMAL 200000
#else
#define MULTI_THREAD_MINIMAL 10000
#endif
#ifndef CBLAS
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
@ -69,7 +73,7 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
#endif
#ifndef CBLAS
PRINT_DEBUG_CNAME;
PRINT_DEBUG_NAME;
#else
PRINT_DEBUG_CNAME;
#endif
@ -93,6 +97,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
if (incx == 0 || incy == 0)
nthreads = 1;
//Work around the low performance issue with small imput size &
//multithreads.
if (n <= MULTI_THREAD_MINIMAL) {
nthreads = 1;
}
if (nthreads == 1) {
#endif