Fixed a few more unnecessary calls to num_cpu_avail.

I don't have as many benchmarks for these as for gemm, but it should still
make a difference for small matrices.
This commit is contained in:
Craig Donner 2018-06-11 10:13:09 +01:00
parent 3313e4b946
commit c2545b0fd6
18 changed files with 59 additions and 92 deletions

View File

@ -83,17 +83,15 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc
if (incy < 0) y -= (n - 1) * incy;
#ifdef SMP
nthreads = num_cpu_avail(1);
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
//
//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= MULTI_THREAD_MINIMAL)
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif

View File

@ -76,10 +76,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
#ifdef SMP
nthreads = num_cpu_avail(1);
if (n <= 1048576 )
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif

View File

@ -90,18 +90,16 @@ void CNAME(blasint n, FLOAT *ALPHA, FLOAT *x, blasint incx, FLOAT *y, blasint in
if (incy < 0) y -= (n - 1) * incy * 2;
#ifdef SMP
nthreads = num_cpu_avail(1);
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
//Work around the low performance issue with small imput size &
//
//Temporarily work-around the low performance issue with small imput size &
//multithreads.
if (n <= MULTI_THREAD_MINIMAL) {
if (incx == 0 || incy == 0 || n <= MULTI_THREAD_MINIMAL)
nthreads = 1;
}
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif

View File

@ -90,10 +90,10 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
FUNCTION_PROFILE_START();
#ifdef SMP
nthreads = num_cpu_avail(1);
if ( n <= 1048576 )
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif

View File

@ -79,12 +79,12 @@ FLOAT *y = (FLOAT*)vy;
if (incy < 0) y -= (n - 1) * incy * 2;
#ifdef SMP
nthreads = num_cpu_avail(1);
//disable multi-thread when incx==0 or incy==0
//In that case, the threads would be dependent.
if (incx == 0 || incy == 0)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
#endif

View File

@ -233,14 +233,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT asum = 0.0;
#if defined(SMP)
if (inc_x == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
asum = casum_compute(n, x, inc_x);
} else {

View File

@ -183,14 +183,11 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
if (n <= 0) return 0;
#if defined(SMP)
if (inc_x == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
do_copy(n, x, inc_x, y, inc_y);
} else {

View File

@ -228,14 +228,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT asum = 0.0;
#if defined(SMP)
if (inc_x == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
asum = dasum_compute(n, x, inc_x);
} else {

View File

@ -384,14 +384,11 @@ RETURN_TYPE CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y
RETURN_TYPE dot = 0.0;
#if defined(SMP)
if (inc_x == 0 || inc_y == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0 || inc_y == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
dot = dot_compute(n, x, inc_x, y, inc_y);
} else {

View File

@ -328,10 +328,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
if (n <= 0 || inc_x <= 0) return 0.0;
#if defined(SMP)
nthreads = num_cpu_avail(1);
if (n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
nrm2_compute(n, x, inc_x, &ssq, &scale);

View File

@ -235,10 +235,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
if (n <= 0 || inc_x <= 0) return 0.0;
#if defined(SMP)
nthreads = num_cpu_avail(1);
if (n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
nrm2 = nrm2_compute(n, x, inc_x);

View File

@ -321,14 +321,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max_index = 0;
#if defined(SMP)
if (inc_x == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
max_index = iamax_compute(n, x, inc_x);
} else {

View File

@ -330,14 +330,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
BLASLONG max_index = 0;
#if defined(SMP)
if (inc_x == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
max_index = izamax_compute(n, x, inc_x);
} else {

View File

@ -230,14 +230,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT asum = 0.0;
#if defined(SMP)
if (inc_x == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
asum = sasum_compute(n, x, inc_x);
} else {

View File

@ -318,10 +318,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
if (n <= 0 || inc_x <= 0) return 0.0;
#if defined(SMP)
nthreads = num_cpu_avail(1);
if (n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (nthreads == 1) {
nrm2_double = nrm2_compute(n, x, inc_x);

View File

@ -230,14 +230,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
FLOAT asum = 0.0;
#if defined(SMP)
if (inc_x == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
asum = zasum_compute(n, x, inc_x);
} else {

View File

@ -317,14 +317,11 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
CIMAG(zdot) = 0.0;
#if defined(SMP)
if (inc_x == 0 || inc_y == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0 || inc_y == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
zdot_compute(n, x, inc_x, y, inc_y, &zdot);
} else {

View File

@ -169,14 +169,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
FLOAT dot = 0.0;
#if defined(SMP)
if (inc_x == 0 || inc_y == 0 || n <= 10000)
nthreads = 1;
else
nthreads = num_cpu_avail(1);
if (inc_x == 0 || inc_y == 0)
nthreads = 1;
if (n <= 10000)
nthreads = 1;
if (nthreads == 1) {
dot = dot_compute(n, x, inc_x, y, inc_y);
} else {