Merge pull request #3288 from martin-frbg/getrf-2

Add lower threshold for multithreading in ?GETRF
This commit is contained in:
Martin Kroeker 2021-07-07 20:45:57 +02:00 committed by GitHub
commit f20c4edc33
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 49 additions and 17 deletions

View File

@ -72,13 +72,17 @@ int main(int argc, char *argv[]){
FLOAT *a,*work;
FLOAT wkopt[4];
blasint *ipiv;
blasint m, i, j, info,lwork;
blasint m, i, j, l, info,lwork;
int from = 1;
int to = 200;
int step = 1;
int loops = 1;
double time1;
double time1,timeg;
char *p;
char btest = 'I';
argc--;argv++;
@ -86,6 +90,9 @@ int main(int argc, char *argv[]){
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
@ -124,32 +131,41 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE FLops Time Lwork\n");
for(m = from; m <= to; m += step){
timeg = 0.;
fprintf(stderr, " %6d : ", (int)m);
GETRF (&m, &m, a, &m, ipiv, &info);
for (l = 0; l < loops; l++) {
if (btest == 'F') begin();
GETRF (&m, &m, a, &m, ipiv, &info);
if (btest == 'F') {
end();
timeg += getsec();
}
if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}
begin();
if (btest == 'I') begin();
lwork = -1;
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
lwork = (blasint)wkopt[0];
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
end();
if (btest == 'I') end();
if (info) {
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
exit(1);
}
time1 = getsec();
if (btest == 'I')
timeg += getsec();
} // loops
time1 = timeg/(double)loops;
fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n",
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);

View File

@ -72,17 +72,21 @@ int main(int argc, char *argv[]){
FLOAT *a, *b;
blasint *ipiv;
blasint m, i, j, info;
blasint m, i, j, l, info;
blasint unit = 1;
int from = 1;
int to = 200;
int step = 1;
int loops = 1;
FLOAT maxerr;
double time1, time2;
double time1, time2, timeg1,timeg2;
char *p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
argc--;argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
@ -110,9 +114,9 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
for(m = from; m <= to; m += step){
timeg1 = timeg2 = 0.;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++) {
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
@ -138,7 +142,7 @@ int main(int argc, char *argv[]){
exit(1);
}
time1 = getsec();
timeg1 += getsec();
begin();
@ -151,8 +155,10 @@ int main(int argc, char *argv[]){
exit(1);
}
time2 = getsec();
timeg2 += getsec();
} //loops
time1=timeg1/(double)loops;
time2=timeg2/(double)loops;
maxerr = 0.;
for(i = 0; i < m; i++){

View File

@ -95,7 +95,14 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint
#ifdef SMP
args.common = NULL;
args.nthreads = num_cpu_avail(4);
#ifndef DOUBLE
if (args.m*args.n < 40000)
#else
if (args.m*args.n < 10000)
#endif
args.nthreads=1;
else
args.nthreads = num_cpu_avail(4);
if (args.nthreads == 1) {
#endif

View File

@ -95,7 +95,10 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint
#ifdef SMP
args.common = NULL;
args.nthreads = num_cpu_avail(4);
if (args.m*args.n <10000)
args.nthreads = 1;
else
args.nthreads = num_cpu_avail(4);
if (args.nthreads == 1) {
#endif