Make sure that range_n of last thread never exceeds the actual data size when splitting the workload

This commit is contained in:
Martin Kroeker
2017-08-02 00:37:58 +02:00
committed by GitHub
parent 1e9247c276
commit c4e5ba1bfe
7 changed files with 19 additions and 3 deletions

View File

@@ -346,7 +346,8 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;
queue[num_cpu].args = &args;
@@ -385,6 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel;