From 9332042d5f6a630d00c868781a0eb3e660517bd7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 28 Jul 2017 00:13:24 +0200 Subject: [PATCH 1/3] Fix range exceeding actual data size in quick_divide --- driver/level2/gbmv_thread.c | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index e86b565f8..6073a4856 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -233,6 +233,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT #else range_m[num_cpu] = num_cpu * ((n + 15) & ~15); #endif + if (range_m[num_cpu] > n) range_m[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = gbmv_kernel; From 857f61bc5dea502d07946a8637e70944b277ee2c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 28 Jul 2017 00:21:53 +0200 Subject: [PATCH 2/3] Fix range limit exceeding data size in last step --- driver/level2/sbmv_thread.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index 5718c0ec9..68ee93ee1 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -246,6 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; @@ -285,6 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; From 585c0010a5de7b42ab32ddb8230b4bc20eeedd43 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 28 Jul 2017 00:27:02 +0200 Subject: [PATCH 3/3] Fix range limit exceeding actual data size in last step --- driver/level2/tbmv_thread.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index 226a922e9..aaf4958e2 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -288,6 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -327,6 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -356,6 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel;