diff --git a/driver/level3/trmm_R.c b/driver/level3/trmm_R.c index bdd9370cd..0882aa496 100644 --- a/driver/level3/trmm_R.c +++ b/driver/level3/trmm_R.c @@ -70,6 +70,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) + BLASLONG start_ls; +#endif m = args -> m; n = args -> n; @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } #else - BLASLONG start_ls; - for(js = n; js > 0; js -= GEMM_R){ min_j = js; if (min_j > GEMM_R) min_j = GEMM_R; diff --git a/driver/level3/trsm_L.c b/driver/level3/trsm_L.c index 78da0eb6c..d8130ee7e 100644 --- a/driver/level3/trsm_L.c +++ b/driver/level3/trsm_L.c @@ -76,6 +76,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((!defined(UPPER) && !defined(TRANSA)) || (defined(UPPER) && defined(TRANSA))) + BLASLONG start_is; +#endif m = args -> m; n = args -> n; @@ -178,8 +181,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } } #else - BLASLONG start_is; - for(ls = m; ls > 0; ls -= GEMM_Q){ min_l = ls; if (min_l > GEMM_Q) min_l = GEMM_Q; diff --git a/driver/level3/trsm_R.c b/driver/level3/trsm_R.c index 169441d1e..f6a57f93f 100644 --- a/driver/level3/trsm_R.c +++ b/driver/level3/trsm_R.c @@ -75,6 +75,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO BLASLONG ls, is, js; BLASLONG min_l, min_i, min_j; BLASLONG jjs, min_jj; +#if !((defined(UPPER) && !defined(TRANSA)) || (!defined(UPPER) && defined(TRANSA))) + BLASLONG start_ls; +#endif m = args -> m; n = args -> n; @@ -226,8 +229,6 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO } #else - BLASLONG start_ls; - for(js = n; js > 0; js -= GEMM_R){ min_j = js; if (min_j > GEMM_R) min_j = GEMM_R; diff --git a/interface/gemm.c b/interface/gemm.c index a5a2b4724..7253b0500 100644 --- a/interface/gemm.c +++ b/interface/gemm.c @@ -121,6 +121,9 @@ void NAME(char *TRANSA, char *TRANSB, FLOAT *sa, *sb; #ifdef SMP + int nthreads_max; + int nthreads_avail; + double MNK; #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -237,6 +240,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS XFLOAT *sa, *sb; #ifdef SMP + int nthreads_max; + int nthreads_avail; + double MNK; #ifndef COMPLEX #ifdef XDOUBLE int mode = BLAS_XDOUBLE | BLAS_REAL; @@ -400,15 +406,15 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS mode |= (transa << BLAS_TRANSA_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT); - int nthreads_max = num_cpu_avail(3); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(3); + nthreads_avail = nthreads_max; #ifndef COMPLEX - double MNK = (double) args.m * (double) args.n * (double) args.k; + MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (65536.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; #else - double MNK = (double) args.m * (double) args.n * (double) args.k; + MNK = (double) args.m * (double) args.n * (double) args.k; if ( MNK <= (8192.0 * (double) GEMM_MULTITHREAD_THRESHOLD) ) nthreads_max = 1; #endif diff --git a/interface/gemv.c b/interface/gemv.c index 2dd82dce5..638329a2c 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -80,6 +80,9 @@ void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *buffer; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { @@ -134,6 +137,9 @@ void CNAME(enum CBLAS_ORDER order, blasint info, t; #ifdef SMP int nthreads; + int nthreads_max; + int nthreads_avail; + double MNK; #endif int (*gemv[])(BLASLONG, BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT * , BLASLONG, FLOAT *, BLASLONG, FLOAT *) = { @@ -212,10 +218,10 @@ void CNAME(enum CBLAS_ORDER order, #ifdef SMP - int nthreads_max = num_cpu_avail(2); - int nthreads_avail = nthreads_max; + nthreads_max = num_cpu_avail(2); + nthreads_avail = nthreads_max; - double MNK = (double) m * (double) n; + MNK = (double) m * (double) n; if ( MNK <= (24.0 * 24.0 * (double) (GEMM_MULTITHREAD_THRESHOLD*GEMM_MULTITHREAD_THRESHOLD) ) ) nthreads_max = 1; diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index b4f33583f..8fdf76987 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -373,7 +373,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG num_cpu; +#ifdef _MSC_VER + BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE]; +#else volatile BLASLONG flag[MAX_CPU_NUMBER * CACHE_LINE_SIZE] __attribute__((aligned(128))); +#endif #ifndef COMPLEX #ifdef XDOUBLE