Merge pull request #1254 from martin-frbg/xbmv_range

Fix calculated range limit exceeding actual data size for last thread
This commit is contained in:
Martin Kroeker 2017-07-31 17:46:40 +02:00 committed by GitHub
commit d537e0de8c
3 changed files with 6 additions and 0 deletions

View File

@ -233,6 +233,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
#else #else
range_m[num_cpu] = num_cpu * ((n + 15) & ~15); range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
#endif #endif
if (range_m[num_cpu] > n) range_m[num_cpu] = n;
queue[num_cpu].mode = mode; queue[num_cpu].mode = mode;
queue[num_cpu].routine = gbmv_kernel; queue[num_cpu].routine = gbmv_kernel;

View File

@ -246,6 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode; queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel; queue[num_cpu].routine = sbmv_kernel;
@ -285,6 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
range_m[num_cpu + 1] = range_m[num_cpu] + width; range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode; queue[num_cpu].mode = mode;
queue[num_cpu].routine = sbmv_kernel; queue[num_cpu].routine = sbmv_kernel;

View File

@ -288,6 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode; queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel; queue[num_cpu].routine = trmv_kernel;
@ -327,6 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[num_cpu + 1] = range_m[num_cpu] + width; range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode; queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel; queue[num_cpu].routine = trmv_kernel;
@ -356,6 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
range_m[num_cpu + 1] = range_m[num_cpu] + width; range_m[num_cpu + 1] = range_m[num_cpu] + width;
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
queue[num_cpu].mode = mode; queue[num_cpu].mode = mode;
queue[num_cpu].routine = trmv_kernel; queue[num_cpu].routine = trmv_kernel;