Merge pull request #3288 from martin-frbg/getrf-2
Add lower threshold for multithreading in ?GETRF
This commit is contained in:
commit
f20c4edc33
|
@ -72,13 +72,17 @@ int main(int argc, char *argv[]){
|
||||||
FLOAT *a,*work;
|
FLOAT *a,*work;
|
||||||
FLOAT wkopt[4];
|
FLOAT wkopt[4];
|
||||||
blasint *ipiv;
|
blasint *ipiv;
|
||||||
blasint m, i, j, info,lwork;
|
blasint m, i, j, l, info,lwork;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
int loops = 1;
|
||||||
|
|
||||||
double time1;
|
double time1,timeg;
|
||||||
|
|
||||||
|
char *p;
|
||||||
|
char btest = 'I';
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
|
||||||
|
@ -86,6 +90,9 @@ int main(int argc, char *argv[]){
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
|
||||||
|
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
|
||||||
|
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||||
|
|
||||||
|
@ -124,32 +131,41 @@ int main(int argc, char *argv[]){
|
||||||
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step){
|
for(m = from; m <= to; m += step){
|
||||||
|
timeg = 0.;
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
for (l = 0; l < loops; l++) {
|
||||||
|
|
||||||
|
if (btest == 'F') begin();
|
||||||
|
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||||
|
if (btest == 'F') {
|
||||||
|
end();
|
||||||
|
timeg += getsec();
|
||||||
|
}
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
begin();
|
if (btest == 'I') begin();
|
||||||
|
|
||||||
lwork = -1;
|
lwork = -1;
|
||||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||||
|
|
||||||
lwork = (blasint)wkopt[0];
|
lwork = (blasint)wkopt[0];
|
||||||
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
||||||
end();
|
if (btest == 'I') end();
|
||||||
|
|
||||||
if (info) {
|
if (info) {
|
||||||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = getsec();
|
if (btest == 'I')
|
||||||
|
timeg += getsec();
|
||||||
|
|
||||||
|
} // loops
|
||||||
|
time1 = timeg/(double)loops;
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||||
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);
|
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);
|
||||||
|
|
|
@ -72,17 +72,21 @@ int main(int argc, char *argv[]){
|
||||||
FLOAT *a, *b;
|
FLOAT *a, *b;
|
||||||
blasint *ipiv;
|
blasint *ipiv;
|
||||||
|
|
||||||
blasint m, i, j, info;
|
blasint m, i, j, l, info;
|
||||||
blasint unit = 1;
|
blasint unit = 1;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
int loops = 1;
|
||||||
|
|
||||||
FLOAT maxerr;
|
FLOAT maxerr;
|
||||||
|
|
||||||
double time1, time2;
|
double time1, time2, timeg1,timeg2;
|
||||||
|
|
||||||
|
char *p;
|
||||||
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
|
||||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||||
|
@ -110,9 +114,9 @@ int main(int argc, char *argv[]){
|
||||||
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
|
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
|
||||||
|
|
||||||
for(m = from; m <= to; m += step){
|
for(m = from; m <= to; m += step){
|
||||||
|
timeg1 = timeg2 = 0.;
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
for (l = 0; l < loops; l++) {
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
@ -138,7 +142,7 @@ int main(int argc, char *argv[]){
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time1 = getsec();
|
timeg1 += getsec();
|
||||||
|
|
||||||
begin();
|
begin();
|
||||||
|
|
||||||
|
@ -151,8 +155,10 @@ int main(int argc, char *argv[]){
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
time2 = getsec();
|
timeg2 += getsec();
|
||||||
|
} //loops
|
||||||
|
time1=timeg1/(double)loops;
|
||||||
|
time2=timeg2/(double)loops;
|
||||||
maxerr = 0.;
|
maxerr = 0.;
|
||||||
|
|
||||||
for(i = 0; i < m; i++){
|
for(i = 0; i < m; i++){
|
||||||
|
|
|
@ -95,7 +95,14 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
args.common = NULL;
|
args.common = NULL;
|
||||||
args.nthreads = num_cpu_avail(4);
|
#ifndef DOUBLE
|
||||||
|
if (args.m*args.n < 40000)
|
||||||
|
#else
|
||||||
|
if (args.m*args.n < 10000)
|
||||||
|
#endif
|
||||||
|
args.nthreads=1;
|
||||||
|
else
|
||||||
|
args.nthreads = num_cpu_avail(4);
|
||||||
|
|
||||||
if (args.nthreads == 1) {
|
if (args.nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -95,7 +95,10 @@ int NAME(blasint *M, blasint *N, FLOAT *a, blasint *ldA, blasint *ipiv, blasint
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
args.common = NULL;
|
args.common = NULL;
|
||||||
args.nthreads = num_cpu_avail(4);
|
if (args.m*args.n <10000)
|
||||||
|
args.nthreads = 1;
|
||||||
|
else
|
||||||
|
args.nthreads = num_cpu_avail(4);
|
||||||
|
|
||||||
if (args.nthreads == 1) {
|
if (args.nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue