From a8cc6b365daa0699a31c4f8db35cc21ff56a0da5 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 20 Feb 2018 20:31:15 +0100 Subject: [PATCH 1/3] Minor fixes --- kernel/generic/ztrmm_ltcopy_8.c | 36 ++++++++---------------- kernel/x86_64/dtrmm_kernel_4x8_haswell.c | 20 ++++++------- lapack/trtri/trtri_L_parallel.c | 6 ++-- lapack/trtri/trtri_U_parallel.c | 6 ++-- 4 files changed, 28 insertions(+), 40 deletions(-) diff --git a/kernel/generic/ztrmm_ltcopy_8.c b/kernel/generic/ztrmm_ltcopy_8.c index 0af2420c3..c71ba3b04 100644 --- a/kernel/generic/ztrmm_ltcopy_8.c +++ b/kernel/generic/ztrmm_ltcopy_8.c @@ -801,32 +801,20 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON i = (m & 1); if (i > 0) { - if (X > posY) { - /* a01 += 2; - a02 += 2; */ - b += 4; - } else - if (X < posY) { - b[ 0] = *(a01 + 0); - b[ 1] = *(a01 + 1); - b[ 2] = *(a01 + 2); - b[ 3] = *(a01 + 3); - - /* a01 += lda; - a02 += lda; */ - b += 4; - } else { #ifdef UNIT - b[ 0] = ONE; - b[ 1] = ZERO; -#else - b[ 0] = *(a01 + 0); - b[ 1] = *(a01 + 1); + if (X < posY) { #endif - b[ 2] = *(a01 + 2); - b[ 3] = *(a01 + 3); - b += 4; - } + b[ 0] = *(a01 + 0); + b[ 1] = *(a01 + 1); +#ifdef UNIT + } else { + b[ 0] = ONE; + b[ 1] = ZERO; + } +#endif + b[ 2] = *(a01 + 2); + b[ 3] = *(a01 + 3); + b += 4; } posY += 2; } diff --git a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c index 289af772e..adaa40f19 100644 --- a/kernel/x86_64/dtrmm_kernel_4x8_haswell.c +++ b/kernel/x86_64/dtrmm_kernel_4x8_haswell.c @@ -301,7 +301,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL dtrmm_kernel_4x8( temp, &alpha , ptrba, ptrbb, C0, C1, C2, C3, C4, C5, C6, C7); ptrba = ptrba + temp * 4; - ptrbb = ptrbb + temp * 8; + // ptrbb = ptrbb + temp * 8; /* for (k=0; k blocking) bk = blocking; - range_N[0] = i; - range_N[1] = i + bk; + /* range_N[0] = i; + range_N[1] = i + bk; */ newarg.lda = lda; newarg.ldb = lda; diff --git a/lapack/trtri/trtri_U_parallel.c b/lapack/trtri/trtri_U_parallel.c index fc48a33f1..b527f8158 100644 --- a/lapack/trtri/trtri_U_parallel.c +++ b/lapack/trtri/trtri_U_parallel.c @@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, BLASLONG n, info; BLASLONG bk, i, blocking; int mode; - BLASLONG lda, range_N[2]; + BLASLONG lda; // , range_N[2]; blas_arg_t newarg; FLOAT *a; FLOAT alpha[2] = { ONE, ZERO}; @@ -96,8 +96,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, bk = n - i; if (bk > blocking) bk = blocking; - range_N[0] = i; - range_N[1] = i + bk; + /* range_N[0] = i; + range_N[1] = i + bk; */ newarg.lda = lda; newarg.ldb = lda; From 87e7cd9e19ea181a557074a40150d203f04d2b16 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 27 Feb 2018 21:13:54 +0100 Subject: [PATCH 2/3] take out unreachable branch to re-trigger CI --- lapack/getrf/getrf_parallel.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/lapack/getrf/getrf_parallel.c b/lapack/getrf/getrf_parallel.c index db8c836e0..36490d6c2 100644 --- a/lapack/getrf/getrf_parallel.c +++ b/lapack/getrf/getrf_parallel.c @@ -124,13 +124,13 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra min_jj = js + min_j - jjs; if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; - if (0 && GEMM_UNROLL_N <= 8) { + /* if (0 && GEMM_UNROLL_N <= 8) { LASWP_NCOPY(min_jj, off + 1, off + k, c + (- off + jjs * lda) * COMPSIZE, lda, ipiv, sbb + k * (jjs - js) * COMPSIZE); - } else { + } else { */ LASWP_PLUS(min_jj, off + 1, off + k, ZERO, #ifdef COMPLEX @@ -140,7 +140,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra GEMM_ONCOPY (k, min_jj, c + jjs * lda * COMPSIZE, lda, sbb + (jjs - js) * k * COMPSIZE); - } + // } for (is = 0; is < k; is += GEMM_P) { min_i = k - is; @@ -251,14 +251,14 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG * min_jj = MIN(n_to, xxx + div_n) - jjs; if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; - if (0 && GEMM_UNROLL_N <= 8) { + /* if (0 && GEMM_UNROLL_N <= 8) { printf("helllo\n"); LASWP_NCOPY(min_jj, off + 1, off + k, b + (- off + jjs * lda) * COMPSIZE, lda, ipiv, buffer[bufferside] + (jjs - xxx) * k * COMPSIZE); - } else { + } else { */ LASWP_PLUS(min_jj, off + 1, off + k, ZERO, #ifdef COMPLEX @@ -268,7 +268,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG * GEMM_ONCOPY (k, min_jj, b + jjs * lda * COMPSIZE, lda, buffer[bufferside] + (jjs - xxx) * k * COMPSIZE); - } + // } for (is = 0; is < k; is += GEMM_P) { min_i = k - is; From f1aaf0777a9616b8008268ff9c2d345f55a0acb2 Mon Sep 17 00:00:00 2001 From: Andrew Date: Tue, 27 Feb 2018 23:12:24 +0100 Subject: [PATCH 3/3] Retrigger CI and fix #1474 --- benchmark/gemm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/gemm.c b/benchmark/gemm.c index 809813c92..72db0c56d 100644 --- a/benchmark/gemm.c +++ b/benchmark/gemm.c @@ -237,7 +237,7 @@ int main(int argc, char *argv[]){ timeg = time1/loops; fprintf(stderr, " %10.2f MFlops %10.6f sec\n", - COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1); + COMPSIZE * COMPSIZE * (2.*(double)k+2.) * (double)m * (double)n / timeg * 1.e-6, time1); }