Add multithreading support to the x86_64 zdot kernel (#2222)
* Add multithreading support copied from the ThunderX2T99 kernel. For #2221
This commit is contained in:
parent
b48c025974
commit
9ef96b32a6
|
@ -86,18 +86,26 @@ static void zdot_kernel_8(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
|
||||||
|
#if defined(SMP)
|
||||||
|
extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n,
|
||||||
|
BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb,
|
||||||
|
void *c, BLASLONG ldc, int (*function)(), int nthreads);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static void zdot_compute (BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y,OPENBLAS_COMPLEX_FLOAT *result)
|
||||||
{
|
{
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
BLASLONG ix,iy;
|
BLASLONG ix,iy;
|
||||||
FLOAT dot[4] = { 0.0, 0.0, 0.0 , 0.0 } ;
|
FLOAT dot[4] = { 0.0, 0.0, 0.0 , 0.0 } ;
|
||||||
|
|
||||||
if ( n <= 0 )
|
if ( n <= 0 )
|
||||||
{
|
{
|
||||||
// CREAL(result) = 0.0 ;
|
OPENBLAS_COMPLEX_FLOAT res=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0,0.0);
|
||||||
// CIMAG(result) = 0.0 ;
|
*result=res;
|
||||||
OPENBLAS_COMPLEX_FLOAT result=OPENBLAS_MAKE_COMPLEX_FLOAT(0.0,0.0);
|
return;
|
||||||
return(result);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -150,18 +158,68 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
OPENBLAS_COMPLEX_FLOAT result=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]-dot[1],dot[2]+dot[3]);
|
OPENBLAS_COMPLEX_FLOAT res=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]-dot[1],dot[2]+dot[3]);
|
||||||
// CREAL(result) = dot[0] - dot[1];
|
|
||||||
// CIMAG(result) = dot[2] + dot[3];
|
|
||||||
#else
|
#else
|
||||||
OPENBLAS_COMPLEX_FLOAT result=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]+dot[1],dot[2]-dot[3]);
|
OPENBLAS_COMPLEX_FLOAT res=OPENBLAS_MAKE_COMPLEX_FLOAT(dot[0]+dot[1],dot[2]-dot[3]);
|
||||||
// CREAL(result) = dot[0] + dot[1];
|
|
||||||
// CIMAG(result) = dot[2] - dot[3];
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
*result=res;
|
||||||
return(result);
|
return;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(SMP)
|
||||||
|
static int zdot_thread_function(BLASLONG n, BLASLONG dummy0,
|
||||||
|
BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *y,
|
||||||
|
BLASLONG inc_y, FLOAT *result, BLASLONG dummy3)
|
||||||
|
{
|
||||||
|
zdot_compute(n, x, inc_x, y, inc_y, (void *)result);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
|
{
|
||||||
|
#if defined(SMP)
|
||||||
|
int nthreads;
|
||||||
|
FLOAT dummy_alpha;
|
||||||
|
#endif
|
||||||
|
OPENBLAS_COMPLEX_FLOAT zdot;
|
||||||
|
CREAL(zdot) = 0.0;
|
||||||
|
CIMAG(zdot) = 0.0;
|
||||||
|
|
||||||
|
#if defined(SMP)
|
||||||
|
if (inc_x == 0 || inc_y == 0 || n <= 10000)
|
||||||
|
nthreads = 1;
|
||||||
|
else
|
||||||
|
nthreads = num_cpu_avail(1);
|
||||||
|
|
||||||
|
if (nthreads == 1) {
|
||||||
|
zdot_compute(n, x, inc_x, y, inc_y, &zdot);
|
||||||
|
} else {
|
||||||
|
int mode, i;
|
||||||
|
char result[MAX_CPU_NUMBER * sizeof(double) * 2];
|
||||||
|
OPENBLAS_COMPLEX_FLOAT *ptr;
|
||||||
|
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
mode = BLAS_SINGLE | BLAS_COMPLEX;
|
||||||
|
#else
|
||||||
|
mode = BLAS_DOUBLE | BLAS_COMPLEX;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha,
|
||||||
|
x, inc_x, y, inc_y, result, 0,
|
||||||
|
( void *)zdot_thread_function, nthreads);
|
||||||
|
|
||||||
|
ptr = (OPENBLAS_COMPLEX_FLOAT *)result;
|
||||||
|
for (i = 0; i < nthreads; i++) {
|
||||||
|
CREAL(zdot) = CREAL(zdot) + CREAL(*ptr);
|
||||||
|
CIMAG(zdot) = CIMAG(zdot) + CIMAG(*ptr);
|
||||||
|
ptr = (void *)(((char *)ptr) + sizeof(double) * 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
zdot_compute(n, x, inc_x, y, inc_y, &zdot);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return zdot;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue