Minor fixes

This commit is contained in:
Andrew 2018-02-20 20:31:15 +01:00
parent e3a80e6aa8
commit a8cc6b365d
4 changed files with 28 additions and 40 deletions

View File

@ -801,33 +801,21 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 1); i = (m & 1);
if (i > 0) { if (i > 0) {
if (X > posY) { #ifdef UNIT
/* a01 += 2;
a02 += 2; */
b += 4;
} else
if (X < posY) { if (X < posY) {
#endif
b[ 0] = *(a01 + 0); b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1); b[ 1] = *(a01 + 1);
b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3);
/* a01 += lda;
a02 += lda; */
b += 4;
} else {
#ifdef UNIT #ifdef UNIT
} else {
b[ 0] = ONE; b[ 0] = ONE;
b[ 1] = ZERO; b[ 1] = ZERO;
#else }
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
#endif #endif
b[ 2] = *(a01 + 2); b[ 2] = *(a01 + 2);
b[ 3] = *(a01 + 3); b[ 3] = *(a01 + 3);
b += 4; b += 4;
} }
}
posY += 2; posY += 2;
} }

View File

@ -301,7 +301,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
dtrmm_kernel_4x8( temp, &alpha , ptrba, ptrbb, C0, C1, C2, C3, C4, C5, C6, C7); dtrmm_kernel_4x8( temp, &alpha , ptrba, ptrbb, C0, C1, C2, C3, C4, C5, C6, C7);
ptrba = ptrba + temp * 4; ptrba = ptrba + temp * 4;
ptrbb = ptrbb + temp * 8; // ptrbb = ptrbb + temp * 8;
/* /*
for (k=0; k<temp; k++) for (k=0; k<temp; k++)
@ -446,7 +446,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
temp - 8; // number of values in B temp - 8; // number of values in B
ptrba += temp*4; // number of values in A ptrba += temp*4; // number of values in A
ptrbb += temp*8; // number of values in B // ptrbb += temp*8; // number of values in B
} }
#ifdef LEFT #ifdef LEFT
off += 4; // number of values in A off += 4; // number of values in A
@ -709,14 +709,14 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; /* C0 = C0+1;
C1 = C1+1; C1 = C1+1;
C2 = C2+1; C2 = C2+1;
C3 = C3+1; C3 = C3+1;
C4 = C4+1; C4 = C4+1;
C5 = C5+1; C5 = C5+1;
C6 = C6+1; C6 = C6+1;
C7 = C7+1; C7 = C7+1; */
} }
@ -862,7 +862,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
temp - 4; // number of values in B */ temp - 4; // number of values in B */
ptrba += temp*4; // number of values in A ptrba += temp*4; // number of values in A
ptrbb += temp*4; // number of values in B // ptrbb += temp*4; // number of values in B
} }
#ifdef LEFT #ifdef LEFT
off += 4; // number of values in A off += 4; // number of values in A
@ -1049,10 +1049,10 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; /* C0 = C0+1;
C1 = C1+1; C1 = C1+1;
C2 = C2+1; C2 = C2+1;
C3 = C3+1; C3 = C3+1; */
} }
@ -1311,8 +1311,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; /* C0 = C0+1;
C1 = C1+1; C1 = C1+1; */
} }
@ -1532,7 +1532,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
off += 1; // number of values in A off += 1; // number of values in A
#endif #endif
C0 = C0+1; // C0 = C0+1;
} }

View File

@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG n, info; BLASLONG n, info;
BLASLONG bk, i, blocking, start_i; BLASLONG bk, i, blocking, start_i;
int mode; int mode;
BLASLONG lda, range_N[2]; BLASLONG lda;//, range_N[2];
blas_arg_t newarg; blas_arg_t newarg;
FLOAT *a; FLOAT *a;
FLOAT alpha[2] = { ONE, ZERO}; FLOAT alpha[2] = { ONE, ZERO};
@ -100,8 +100,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i; bk = n - i;
if (bk > blocking) bk = blocking; if (bk > blocking) bk = blocking;
range_N[0] = i; /* range_N[0] = i;
range_N[1] = i + bk; range_N[1] = i + bk; */
newarg.lda = lda; newarg.lda = lda;
newarg.ldb = lda; newarg.ldb = lda;

View File

@ -54,7 +54,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG n, info; BLASLONG n, info;
BLASLONG bk, i, blocking; BLASLONG bk, i, blocking;
int mode; int mode;
BLASLONG lda, range_N[2]; BLASLONG lda; // , range_N[2];
blas_arg_t newarg; blas_arg_t newarg;
FLOAT *a; FLOAT *a;
FLOAT alpha[2] = { ONE, ZERO}; FLOAT alpha[2] = { ONE, ZERO};
@ -96,8 +96,8 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
bk = n - i; bk = n - i;
if (bk > blocking) bk = blocking; if (bk > blocking) bk = blocking;
range_N[0] = i; /* range_N[0] = i;
range_N[1] = i + bk; range_N[1] = i + bk; */
newarg.lda = lda; newarg.lda = lda;
newarg.ldb = lda; newarg.ldb = lda;