Use a single thread for small input size
copies daxpy improvement from #27, see #1560
This commit is contained in:
parent
0218b884c1
commit
e8880c1699
|
@ -41,7 +41,11 @@
|
||||||
#ifdef FUNCTION_PROFILE
|
#ifdef FUNCTION_PROFILE
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(Z13)
|
||||||
|
#define MULTI_THREAD_MINIMAL 200000
|
||||||
|
#else
|
||||||
|
#define MULTI_THREAD_MINIMAL 10000
|
||||||
|
#endif
|
||||||
#ifndef CBLAS
|
#ifndef CBLAS
|
||||||
|
|
||||||
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
|
void NAME(blasint *N, FLOAT *ALPHA, FLOAT *x, blasint *INCX, FLOAT *y, blasint *INCY){
|
||||||
|
@ -69,7 +73,7 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef CBLAS
|
#ifndef CBLAS
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_NAME;
|
||||||
#else
|
#else
|
||||||
PRINT_DEBUG_CNAME;
|
PRINT_DEBUG_CNAME;
|
||||||
#endif
|
#endif
|
||||||
|
@ -93,6 +97,11 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
|
||||||
if (incx == 0 || incy == 0)
|
if (incx == 0 || incy == 0)
|
||||||
nthreads = 1;
|
nthreads = 1;
|
||||||
|
|
||||||
|
//Work around the low performance issue with small imput size &
|
||||||
|
//multithreads.
|
||||||
|
if (n <= MULTI_THREAD_MINIMAL) {
|
||||||
|
nthreads = 1;
|
||||||
|
}
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue