diff --git a/kernel/x86_64/dasum.c b/kernel/x86_64/dasum.c index 8a40ea4b9..ddec21383 100644 --- a/kernel/x86_64/dasum.c +++ b/kernel/x86_64/dasum.c @@ -58,21 +58,19 @@ static FLOAT dasum_kernel(BLASLONG n, FLOAT *x1) } #endif - -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +static FLOAT asum_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x) { - BLASLONG i=0; + BLASLONG i = 0; FLOAT sumf = 0.0; + + if (n <= 0 || inc_x <= 0) return (sumf); - if (n <= 0 || inc_x <= 0) return(sumf); - - if ( inc_x == 1 ) { + if (inc_x == 1) { sumf = dasum_kernel(n, x); - } + } else { n *= inc_x; - - while(i < n) { + while (i < n) { sumf += ABS_K(x[i]); i += inc_x; } @@ -80,3 +78,53 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) return(sumf); } +#if defined(SMP) +static int asum_thread_function(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *dummy3, BLASLONG dummy4, FLOAT *result, BLASLONG dummy5) +{ + *(FLOAT *)result = asum_compute(n, x, inc_x); + return 0; +} + +extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int (*function)(), int nthreads); +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ +#if defined(SMP) + int nthreads; + FLOAT dummy_alpha; + FLOAT * dummy_b; +#endif + FLOAT sumf = 0.0; + +#if defined(SMP) + int num_cpu = num_cpu_avail(1); + if (n <= 100000 || inc_x <= 0) + nthreads = 1; + else + nthreads = num_cpu < n/100000 ? num_cpu : n/100000; + + if (nthreads == 1) { + sumf = asum_compute(n, x, inc_x); + } else { + int mode, i; + char result[MAX_CPU_NUMBER * sizeof(double) *2]; + FLOAT *ptr; +#if !defined(DOUBLE) + mode = BLAS_SINGLE | BLAS_REAL; +#else + mode = BLAS_DOUBLE | BLAS_REAL; +#endif + blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, dummy_b, 0, result, 0, (void *)asum_thread_function, nthreads); + ptr = (FLOAT *)result; + for (i = 0; i < nthreads; i++) { + sumf += (*ptr); + ptr = (FLOAT *)(((char *)ptr) + sizeof(double) *2); + } + } +#else + sumf = asum_compute(n, x, inc_x); +#endif + return(sumf); +} + diff --git a/kernel/x86_64/sasum.c b/kernel/x86_64/sasum.c index 36ec4a737..d0cea9bee 100644 --- a/kernel/x86_64/sasum.c +++ b/kernel/x86_64/sasum.c @@ -67,24 +67,71 @@ static FLOAT sasum_kernel(BLASLONG n, FLOAT *x1) #endif -FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +static FLOAT asum_compute(BLASLONG n, FLOAT * x, BLASLONG inc_x) { - BLASLONG i=0; + BLASLONG i = 0; FLOAT sumf = 0.0; + + if (n <= 0 || inc_x <= 0) return (sumf); - if (n <= 0 || inc_x <= 0) return(sumf); - - if ( inc_x == 1 ) { + if (inc_x == 1) { sumf = sasum_kernel(n, x); } else { - n *= inc_x; while(i < n) { sumf += ABS_K(x[i]); i += inc_x; } - } + return (sumf); +} + +#if defined(SMP) +static int asum_thread_function(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy2, FLOAT *x, BLASLONG inc_x, FLOAT *dummy3, BLASLONG dummy4, FLOAT *result, BLASLONG dummy5) +{ + *(FLOAT *)result = asum_compute(n, x, inc_x); + return 0; +} + +extern int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void * alpha, void *a, BLASLONG lda, void *b, BLASLONG ldb, void *c, BLASLONG ldc, int(*function)(), int nthreads); +#endif + +FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) +{ +#if defined(SMP) + int nthreads; + FLOAT dummy_alpha; +#endif + FLOAT sumf = 0.0; + +#if defined(SMP) + int num_cpu = num_cpu_avail(1); + if (n <= 100000 || inc_x <= 0) + nthreads = 1; + else + nthreads = num_cpu < n/100000 ? num_cpu : n/100000; + if (nthreads == 1) { + sumf = asum_compute(n, x, inc_x); + } + else { + int mode, i; + char result[MAX_CPU_NUMBER * sizeof(double) *2]; + FLOAT * ptr; +#if !defined(DOUBLE) + mode = BLAS_SINGLE | BLAS_REAL; +#else + mode = BLAS_DOUBLE | BLAS_REAL; +#endif + blas_level1_thread_with_return_value(mode, n, 0, 0, &dummy_alpha, x, inc_x, NULL, 0, result, 0, (void *)asum_thread_function, nthreads); + ptr = (FLOAT *)result; + for (i = 0; i < nthreads; i++) { + sumf += (*ptr); + ptr = (FLOAT *)(((char *)ptr) + sizeof(double) * 2); + } + } +#else + sumf = asum_compute(n, x, inc_x); +#endif return(sumf); }