Merge pull request #3292 from martin-frbg/syrk_limit
Add lower limit for multithreading in xSYRK
This commit is contained in:
commit
4ed99c2ce3
|
@ -60,13 +60,16 @@ int main(int argc, char *argv[]){
|
||||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||||
|
|
||||||
blasint m, i, j;
|
blasint m, i, j, l;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
int to = 200;
|
int to = 200;
|
||||||
int step = 1;
|
int step = 1;
|
||||||
|
int loops = 1;
|
||||||
|
|
||||||
double time1;
|
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
|
||||||
|
|
||||||
|
double time1,timeg;
|
||||||
|
|
||||||
argc--;argv++;
|
argc--;argv++;
|
||||||
|
|
||||||
|
@ -95,9 +98,12 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
for(m = from; m <= to; m += step)
|
for(m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
timeg = 0.;
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
for(l = 0; l < loops; l++) {
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
@ -111,8 +117,10 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
end();
|
end();
|
||||||
|
|
||||||
time1 = getsec();
|
timeg += getsec();
|
||||||
|
|
||||||
|
} //loops
|
||||||
|
time1 = timeg / (double)loops;
|
||||||
fprintf(stderr,
|
fprintf(stderr,
|
||||||
" %10.2f MFlops\n",
|
" %10.2f MFlops\n",
|
||||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||||
|
|
|
@ -354,6 +354,17 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Tr
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
args.common = NULL;
|
args.common = NULL;
|
||||||
|
#ifndef COMPLEX
|
||||||
|
#ifdef DOUBLE
|
||||||
|
if (args.n < 100)
|
||||||
|
#else
|
||||||
|
if (args.n < 200)
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
if (args.n < 65)
|
||||||
|
#endif
|
||||||
|
args.nthreads = 1;
|
||||||
|
else
|
||||||
args.nthreads = num_cpu_avail(3);
|
args.nthreads = num_cpu_avail(3);
|
||||||
|
|
||||||
if (args.nthreads == 1) {
|
if (args.nthreads == 1) {
|
||||||
|
|
Loading…
Reference in New Issue