LAPACK helpers in C that need care too

This commit is contained in:
Andrew 2018-01-02 14:38:50 +01:00
parent 8aafa0473c
commit d602b99386
11 changed files with 27 additions and 25 deletions

View File

@ -288,7 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
i++; i++;
} while (i <= k2); } while (i <= k2);
a += lda; // a += lda;
} }
return 0; return 0;

View File

@ -379,7 +379,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
if (m & 1){ if (m & 1){
ctemp01 = *(aoffset1 + 0); ctemp01 = *(aoffset1 + 0);
*(boffset + 0) = -ctemp01; *(boffset + 0) = -ctemp01;
boffset += 1; // boffset += 1;
} }
} }

View File

@ -719,10 +719,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
if (m & 1){ if (m & 1){
aoffset1 = aoffset; aoffset1 = aoffset;
aoffset += lda; // aoffset += lda;
boffset1 = boffset; boffset1 = boffset;
boffset += 8; // boffset += 8;
i = (n >> 3); i = (n >> 3);
if (i > 0){ if (i > 0){
@ -762,7 +762,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset2 + 1) = -ctemp02; *(boffset2 + 1) = -ctemp02;
*(boffset2 + 2) = -ctemp03; *(boffset2 + 2) = -ctemp03;
*(boffset2 + 3) = -ctemp04; *(boffset2 + 3) = -ctemp04;
boffset2 += 4; // boffset2 += 4;
} }
if (n & 2){ if (n & 2){
@ -772,7 +772,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 0) = -ctemp01; *(boffset3 + 0) = -ctemp01;
*(boffset3 + 1) = -ctemp02; *(boffset3 + 1) = -ctemp02;
boffset3 += 2; // boffset3 += 2;
} }
if (n & 1){ if (n & 1){

View File

@ -372,7 +372,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*(b1 + 0) = A1; *(b1 + 0) = A1;
*(b1 + 1) = A2; *(b1 + 1) = A2;
} }
buffer += 2; // buffer += 2;
} }
} }

View File

@ -702,7 +702,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*(b1 + 0) = A1; *(b1 + 0) = A1;
*(b1 + 1) = A2; *(b1 + 1) = A2;
} }
buffer += 2; // buffer += 2;
} }
} }

View File

@ -140,7 +140,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset1 + 6) = -ctemp11; *(b_offset1 + 6) = -ctemp11;
*(b_offset1 + 7) = -ctemp12; *(b_offset1 + 7) = -ctemp12;
b_offset1 += m * 4; // b_offset1 += m * 4;
a_offset1 += 4; a_offset1 += 4;
a_offset2 += 4; a_offset2 += 4;
} }
@ -204,7 +204,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(b_offset + 2) = -ctemp3; *(b_offset + 2) = -ctemp3;
*(b_offset + 3) = -ctemp4; *(b_offset + 3) = -ctemp4;
b_offset += m * 4; // b_offset += m * 4;
a_offset += 4; a_offset += 4;
} }

View File

@ -233,10 +233,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 6) = -ctemp07; *(boffset3 + 6) = -ctemp07;
*(boffset3 + 7) = -ctemp08; *(boffset3 + 7) = -ctemp08;
aoffset1 += 2; /* aoffset1 += 2;
aoffset2 += 2; aoffset2 += 2;
aoffset3 += 2; aoffset3 += 2;
aoffset4 += 2; aoffset4 += 2; */
boffset3 += 8; boffset3 += 8;
} }
@ -293,8 +293,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
aoffset1 += 8; aoffset1 += 8;
aoffset2 += 8; aoffset2 += 8;
aoffset3 += 8; /* aoffset3 += 8;
aoffset4 += 8; aoffset4 += 8; */
boffset1 += m * 8; boffset1 += m * 8;
i --; i --;
@ -338,8 +338,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset3 + 2) = -ctemp03; *(boffset3 + 2) = -ctemp03;
*(boffset3 + 3) = -ctemp04; *(boffset3 + 3) = -ctemp04;
aoffset1 += 2; /* aoffset1 += 2;
aoffset2 += 2; aoffset2 += 2; */
boffset3 += 4; boffset3 += 4;
} }
} }
@ -387,7 +387,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset2 + 3) = -ctemp04; *(boffset2 + 3) = -ctemp04;
aoffset1 += 4; aoffset1 += 4;
boffset2 += 4; // boffset2 += 4;
} }
if (n & 1){ if (n & 1){

View File

@ -324,7 +324,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
if (n & 1){ if (n & 1){
aoffset1 = aoffset; aoffset1 = aoffset;
aoffset2 = aoffset + lda; aoffset2 = aoffset + lda;
aoffset += 2; // aoffset += 2;
i = (m >> 1); i = (m >> 1);
if (i > 0){ if (i > 0){
@ -353,7 +353,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, FLOAT *b){
*(boffset + 0) = -ctemp01; *(boffset + 0) = -ctemp01;
*(boffset + 1) = -ctemp02; *(boffset + 1) = -ctemp02;
boffset += 2; // boffset += 2;
} }
} }

View File

@ -51,7 +51,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
BLASLONG jjs, min_jj; BLASLONG jjs, min_jj;
blasint *ipiv, iinfo, info; blasint *ipiv, iinfo, info;
BLASLONG jb, mn, blocking; BLASLONG jb, mn, blocking;
FLOAT *a, *offsetA, *offsetB; FLOAT *a, *offsetA; //, *offsetB;
BLASLONG range_N[2]; BLASLONG range_N[2];
FLOAT *sbb; FLOAT *sbb;
@ -99,7 +99,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
if (jb > blocking) jb = blocking; if (jb > blocking) jb = blocking;
offsetA = a + j * lda * COMPSIZE; offsetA = a + j * lda * COMPSIZE;
offsetB = a + (j + jb) * lda * COMPSIZE; // offsetB = a + (j + jb) * lda * COMPSIZE;
range_N[0] = offset + j; range_N[0] = offset + j;
range_N[1] = offset + j + jb; range_N[1] = offset + j + jb;

View File

@ -67,8 +67,6 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (j = n - 1; j >= 0; j--) { for (j = n - 1; j >= 0; j--) {
ajj_r = ONE;
ajj_i = ZERO;
#ifndef UNIT #ifndef UNIT
ajj_r = *(a + (j + j * lda) * COMPSIZE + 0); ajj_r = *(a + (j + j * lda) * COMPSIZE + 0);
@ -88,6 +86,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
*(a + (j + j * lda) * COMPSIZE + 0) = ajj_r; *(a + (j + j * lda) * COMPSIZE + 0) = ajj_r;
*(a + (j + j * lda) * COMPSIZE + 1) = ajj_i; *(a + (j + j * lda) * COMPSIZE + 1) = ajj_i;
#else
ajj_r = ONE;
ajj_i = ZERO;
#endif #endif
ZTRMV (n - j - 1, ZTRMV (n - j - 1,

View File

@ -67,8 +67,6 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
for (j = 0; j < n; j++) { for (j = 0; j < n; j++) {
ajj_r = ONE;
ajj_i = ZERO;
#ifndef UNIT #ifndef UNIT
ajj_r = *(a + (j + j * lda) * COMPSIZE + 0); ajj_r = *(a + (j + j * lda) * COMPSIZE + 0);
@ -89,6 +87,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
*(a + (j + j * lda) * COMPSIZE + 0) = ajj_r; *(a + (j + j * lda) * COMPSIZE + 0) = ajj_r;
*(a + (j + j * lda) * COMPSIZE + 1) = ajj_i; *(a + (j + j * lda) * COMPSIZE + 1) = ajj_i;
#else
ajj_r = ONE;
ajj_i = ZERO;
#endif #endif
ZTRMV (j, ZTRMV (j,