This commit is contained in:
Andrew 2018-02-28 17:48:10 +00:00 committed by GitHub
commit 83b0712602
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 35 additions and 47 deletions

View File

@ -237,7 +237,7 @@ int main(int argc, char *argv[]){
timeg = time1/loops; timeg = time1/loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1); COMPSIZE * COMPSIZE * (2.*(double)k+2.) * (double)m * (double)n / timeg * 1.e-6, time1);
} }

View File

@ -801,33 +801,21 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 1); i = (m & 1);
if (i > 0) { if (i > 0) {
if (X > posY) { #ifdef UNIT
/* a01 += 2;
a02 += 2; */
b += 4;
} else
if (X < posY) { if (X < posY) {
#endif
b[ 0] = *(a01 + 0); b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1); b[ 1] = *(a01 + 1);
b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3);
/* a01 += lda;
a02 += lda; */
b += 4;
} else {
#ifdef UNIT #ifdef UNIT
} else {
b[ 0] = ONE; b[ 0] = ONE;
b[ 1] = ZERO; b[ 1] = ZERO;
#else }
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
#endif #endif
b[ 2] = *(a01 + 2); b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3); b[ 3] = *(a01 + 3);
b += 4; b += 4;
} }
}
posY += 2; posY += 2;
} }

View File

@ -301,7 +301,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
dtrmm_kernel_4x8( temp, &alpha , ptrba, ptrbb, C0, C1, C2, C3, C4, C5, C6, C7); dtrmm_kernel_4x8( temp, &alpha , ptrba, ptrbb, C0, C1, C2, C3, C4, C5, C6, C7);
ptrba = ptrba + temp * 4; ptrba = ptrba + temp * 4;
ptrbb = ptrbb + temp * 8; // ptrbb = ptrbb + temp * 8;
/* /*
for (k=0; k<temp; k++) for (k=0; k<temp; k++)
@ -446,7 +446,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
temp - 8; // number of values in B temp - 8; // number of values in B
ptrba += temp*4; // number of values in A ptrba += temp*4; // number of values in A
ptrbb += temp*8; // number of values in B // ptrbb += temp*8; // number of values in B
} }
#ifdef LEFT #ifdef LEFT
off += 4; // number of values in A off += 4; // number of values in A
@ -709,14 +709,14 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; /* C0 = C0+1;
C1 = C1+1; C1 = C1+1;
C2 = C2+1; C2 = C2+1;
C3 = C3+1; C3 = C3+1;
C4 = C4+1; C4 = C4+1;
C5 = C5+1; C5 = C5+1;
C6 = C6+1; C6 = C6+1;
C7 = C7+1; C7 = C7+1; */
} }
@ -862,7 +862,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
temp - 4; // number of values in B */ temp - 4; // number of values in B */
ptrba += temp*4; // number of values in A ptrba += temp*4; // number of values in A
ptrbb += temp*4; // number of values in B // ptrbb += temp*4; // number of values in B
} }
#ifdef LEFT #ifdef LEFT
off += 4; // number of values in A off += 4; // number of values in A
@ -1049,10 +1049,10 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; /* C0 = C0+1;
C1 = C1+1; C1 = C1+1;
C2 = C2+1; C2 = C2+1;
C3 = C3+1; C3 = C3+1; */
} }
@ -1311,8 +1311,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; /* C0 = C0+1;
C1 = C1+1; C1 = C1+1; */
} }
@ -1532,7 +1532,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; // C0 = C0+1;
} }

View File

@ -124,13 +124,13 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
min_jj = js + min_j - jjs; min_jj = js + min_j - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
if (0 && GEMM_UNROLL_N <= 8) { /* if (0 && GEMM_UNROLL_N <= 8) {
LASWP_NCOPY(min_jj, off + 1, off + k, LASWP_NCOPY(min_jj, off + 1, off + k,
c + (- off + jjs * lda) * COMPSIZE, lda, c + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, sbb + k * (jjs - js) * COMPSIZE); ipiv, sbb + k * (jjs - js) * COMPSIZE);
} else { } else { */
LASWP_PLUS(min_jj, off + 1, off + k, ZERO, LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX #ifdef COMPLEX
@ -140,7 +140,7 @@ static void inner_basic_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *ra
GEMM_ONCOPY (k, min_jj, c + jjs * lda * COMPSIZE, lda, sbb + (jjs - js) * k * COMPSIZE); GEMM_ONCOPY (k, min_jj, c + jjs * lda * COMPSIZE, lda, sbb + (jjs - js) * k * COMPSIZE);
} // }
for (is = 0; is < k; is += GEMM_P) { for (is = 0; is < k; is += GEMM_P) {
min_i = k - is; min_i = k - is;
@ -251,14 +251,14 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
min_jj = MIN(n_to, xxx + div_n) - jjs; min_jj = MIN(n_to, xxx + div_n) - jjs;
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
if (0 && GEMM_UNROLL_N <= 8) { /* if (0 && GEMM_UNROLL_N <= 8) {
printf("helllo\n"); printf("helllo\n");
LASWP_NCOPY(min_jj, off + 1, off + k, LASWP_NCOPY(min_jj, off + 1, off + k,
b + (- off + jjs * lda) * COMPSIZE, lda, b + (- off + jjs * lda) * COMPSIZE, lda,
ipiv, buffer[bufferside] + (jjs - xxx) * k * COMPSIZE); ipiv, buffer[bufferside] + (jjs - xxx) * k * COMPSIZE);
} else { } else { */
LASWP_PLUS(min_jj, off + 1, off + k, ZERO, LASWP_PLUS(min_jj, off + 1, off + k, ZERO,
#ifdef COMPLEX #ifdef COMPLEX
@ -268,7 +268,7 @@ static int inner_advanced_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *
GEMM_ONCOPY (k, min_jj, b + jjs * lda * COMPSIZE, lda, GEMM_ONCOPY (k, min_jj, b + jjs * lda * COMPSIZE, lda,
buffer[bufferside] + (jjs - xxx) * k * COMPSIZE); buffer[bufferside] + (jjs - xxx) * k * COMPSIZE);
} // }
for (is = 0; is < k; is += GEMM_P) { for (is = 0; is < k; is += GEMM_P) {
min_i = k - is; min_i = k - is;

View File

@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG n, info; BLASLONG n, info;
BLASLONG bk, i, blocking, start_i; BLASLONG bk, i, blocking, start_i;
int mode; int mode;
BLASLONG lda, range_N[2]; BLASLONG lda;//, range_N[2];
blas_arg_t newarg; blas_arg_t newarg;
FLOAT *a; FLOAT *a;
FLOAT alpha[2] = { ONE, ZERO}; FLOAT alpha[2] = { ONE, ZERO};
@ -100,8 +100,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i; bk = n - i;
if (bk > blocking) bk = blocking; if (bk > blocking) bk = blocking;
range_N[0] = i; /* range_N[0] = i;
range_N[1] = i + bk; range_N[1] = i + bk; */
newarg.lda = lda; newarg.lda = lda;
newarg.ldb = lda; newarg.ldb = lda;

View File

@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG n, info; BLASLONG n, info;
BLASLONG bk, i, blocking; BLASLONG bk, i, blocking;
int mode; int mode;
BLASLONG lda, range_N[2]; BLASLONG lda; // , range_N[2];
blas_arg_t newarg; blas_arg_t newarg;
FLOAT *a; FLOAT *a;
FLOAT alpha[2] = { ONE, ZERO}; FLOAT alpha[2] = { ONE, ZERO};
@ -96,8 +96,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i; bk = n - i;
if (bk > blocking) bk = blocking; if (bk > blocking) bk = blocking;
range_N[0] = i; /* range_N[0] = i;
range_N[1] = i + bk; range_N[1] = i + bk; */
newarg.lda = lda; newarg.lda = lda;
newarg.ldb = lda; newarg.ldb = lda;