Merge pull request #564 from wernsaar/develop
Use only 1 thread in trsm if m or n < 2*GEMM_MULTITHREAD_THRESHOLD
This commit is contained in:
commit
e52d36450a
|
@ -130,11 +130,21 @@ int main(int argc, char *argv[]){
|
||||||
char trans='N';
|
char trans='N';
|
||||||
char diag ='U';
|
char diag ='U';
|
||||||
|
|
||||||
|
|
||||||
|
int l;
|
||||||
|
int loops = 1;
|
||||||
|
double timeg;
|
||||||
|
|
||||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
|
||||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
|
||||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
|
||||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p;
|
||||||
|
|
||||||
|
p = getenv("OPENBLAS_LOOPS");
|
||||||
|
if ( p != NULL )
|
||||||
|
loops = atoi(p);
|
||||||
|
|
||||||
|
|
||||||
blasint m, i, j;
|
blasint m, i, j;
|
||||||
|
|
||||||
int from = 1;
|
int from = 1;
|
||||||
|
@ -150,7 +160,7 @@ int main(int argc, char *argv[]){
|
||||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||||
|
|
||||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag);
|
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops);
|
||||||
|
|
||||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||||
|
@ -171,8 +181,14 @@ int main(int argc, char *argv[]){
|
||||||
for(m = from; m <= to; m += step)
|
for(m = from; m <= to; m += step)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
timeg=0.0;
|
||||||
|
|
||||||
fprintf(stderr, " %6d : ", (int)m);
|
fprintf(stderr, " %6d : ", (int)m);
|
||||||
|
|
||||||
|
for (l=0; l<loops; l++)
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
for(j = 0; j < m; j++){
|
for(j = 0; j < m; j++){
|
||||||
for(i = 0; i < m * COMPSIZE; i++){
|
for(i = 0; i < m * COMPSIZE; i++){
|
||||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||||
|
@ -188,11 +204,12 @@ int main(int argc, char *argv[]){
|
||||||
|
|
||||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||||
|
|
||||||
gettimeofday( &start, (struct timezone *)0);
|
timeg += time1;
|
||||||
|
}
|
||||||
|
|
||||||
fprintf(stderr,
|
time1 = timeg/loops;
|
||||||
" %10.2f MFlops\n",
|
|
||||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -362,6 +362,12 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
mode |= (side << BLAS_RSIDE_SHIFT);
|
mode |= (side << BLAS_RSIDE_SHIFT);
|
||||||
|
|
||||||
args.nthreads = num_cpu_avail(3);
|
args.nthreads = num_cpu_avail(3);
|
||||||
|
if ( args.m < 2*GEMM_MULTITHREAD_THRESHOLD )
|
||||||
|
args.nthreads = 1;
|
||||||
|
else
|
||||||
|
if ( args.n < 2*GEMM_MULTITHREAD_THRESHOLD )
|
||||||
|
args.nthreads = 1;
|
||||||
|
|
||||||
|
|
||||||
if (args.nthreads == 1) {
|
if (args.nthreads == 1) {
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Reference in New Issue