Make sure that range_n of last thread never exceeds the actual data size when splitting the workload
This commit is contained in:
parent
1e9247c276
commit
c4e5ba1bfe
|
@ -230,8 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT
|
|||
|
||||
#ifndef TRANSA
|
||||
range_m[num_cpu] = num_cpu * ((m + 15) & ~15);
|
||||
if (range_m[num_cpu] > m) range_m[num_cpu] = m;
|
||||
#else
|
||||
range_m[num_cpu] = num_cpu * ((n + 15) & ~15);
|
||||
if (range_m[num_cpu] > n) range_m[num_cpu] = n;
|
||||
#endif
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
|
|
|
@ -246,6 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
|||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = sbmv_kernel;
|
||||
|
@ -285,6 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = sbmv_kernel;
|
||||
|
@ -316,6 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x
|
|||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
|
||||
range_n[num_cpu] = num_cpu * ((n + 15) & ~15);
|
||||
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = sbmv_kernel;
|
||||
|
|
|
@ -246,6 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
|
|||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = spmv_kernel;
|
||||
|
@ -285,6 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y,
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = spmv_kernel;
|
||||
|
|
|
@ -177,7 +177,8 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode;
|
||||
queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel;
|
||||
queue[MAX_CPU_NUMBER - num_cpu - 1].args = &args;
|
||||
|
@ -225,6 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = symv_kernel;
|
||||
|
|
|
@ -288,6 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
|
|||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
|
@ -327,6 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
|
@ -356,6 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > n) range_n[num_cpu] = n;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
|
|
|
@ -307,7 +307,8 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
|
|||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = tpmv_kernel;
|
||||
queue[num_cpu].args = &args;
|
||||
|
@ -346,6 +347,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = tpmv_kernel;
|
||||
|
|
|
@ -346,7 +346,8 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
|
|||
|
||||
range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
queue[num_cpu].args = &args;
|
||||
|
@ -385,6 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu
|
|||
|
||||
range_m[num_cpu + 1] = range_m[num_cpu] + width;
|
||||
range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16);
|
||||
if (range_n[num_cpu] > m) range_n[num_cpu] = m;
|
||||
|
||||
queue[num_cpu].mode = mode;
|
||||
queue[num_cpu].routine = trmv_kernel;
|
||||
|
|
Loading…
Reference in New Issue