Merge pull request #3292 from martin-frbg/syrk_limit

Add lower limit for multithreading in xSYRK
This commit is contained in:
Martin Kroeker 2021-07-07 20:46:28 +02:00 committed by GitHub
commit 4ed99c2ce3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 24 additions and 5 deletions

View File

@ -60,13 +60,16 @@ int main(int argc, char *argv[]){
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
blasint m, i, j; blasint m, i, j, l;
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;
int loops = 1;
double time1; if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
double time1,timeg;
argc--;argv++; argc--;argv++;
@ -95,9 +98,12 @@ int main(int argc, char *argv[]){
for(m = from; m <= to; m += step) for(m = from; m <= to; m += step)
{ {
timeg = 0.;
fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for(l = 0; l < loops; l++) {
for(j = 0; j < m; j++){ for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){ for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
@ -111,8 +117,10 @@ int main(int argc, char *argv[]){
end(); end();
time1 = getsec(); timeg += getsec();
} //loops
time1 = timeg / (double)loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

View File

@ -354,6 +354,17 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
#endif #endif
args.common = NULL; args.common = NULL;
#ifndef COMPLEX
#ifdef DOUBLE
if (args.n < 100)
#else
if (args.n < 200)
#endif
#else
if (args.n < 65)
#endif
args.nthreads = 1;
else
args.nthreads = num_cpu_avail(3); args.nthreads = num_cpu_avail(3);
if (args.nthreads == 1) { if (args.nthreads == 1) {