Merge pull request #2026 from martin-frbg/trmv_threads

Correct range limiting in trmv_thread and re-enable TRMV multithreading
This commit is contained in:
Martin Kroeker 2019-03-04 15:08:31 +01:00 committed by GitHub
commit 10d841d8b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 3 additions and 9 deletions

View File

@ -346,7 +346,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
@ -386,7 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
if (range_n[num_cpu] > m * num_cpu) range_n[num_cpu] = m * num_cpu;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;

View File

@ -218,11 +218,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
/* nthreads = num_cpu_avail(2);
nthreads = num_cpu_avail(2);
FIXME trmv_thread was found to be broken, see issue 1332 */
nthreads = 1;
if (nthreads == 1) {
#endif

View File

@ -239,9 +239,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
} else
nthreads = 1;
/* FIXME TRMV multithreading appears to be broken, see issue 1332*/
nthreads = 1;
if(nthreads > 1) {
buffer_size = n > 16 ? 0 : n * 4 + 40;
}