Revert changes from PR#1419

at least one of these changes apparently is an oversimplification, leading to TRMM breakage on some platforms as observed in #1563
This commit is contained in:
Martin Kroeker 2018-05-17 11:40:08 +02:00 committed by GitHub
parent 6791294312
commit 7a7619af6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 147 additions and 84 deletions

View File

@ -116,22 +116,34 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {
if (X > posY) { if (X > posY) {
/* ao1 += 1; ao1 += 1;
ao2 += 1; */ ao2 += 1;
b += 2; b += 2;
} else } else
#ifdef UNIT
if (X < posY) { if (X < posY) {
#endif data01 = *(ao1 + 0);
b[ 0] = *(ao1 + 0); data02 = *(ao1 + 1);
#ifdef UNIT
b[ 0] = data01;
b[ 1] = data02;
ao1 += lda;
b += 2;
} else { } else {
#ifdef UNIT
data02 = *(ao1 + 1);
b[ 0] = ONE; b[ 0] = ONE;
b[ 1] = data02;
#else
data01 = *(ao1 + 0);
data02 = *(ao1 + 1);
b[ 0] = data01;
b[ 1] = data02;
#endif
ao1 += 2;
b += 2;
} }
#endif
b[ 1] = *(ao1 + 1);
b += 2;
} }
posY += 2; posY += 2;
@ -178,7 +190,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }
// posY += 1; posY += 1;
} }
return 0; return 0;

View File

@ -518,7 +518,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 15); i = (m & 15);
if (i > 0) { if (i > 0) {
if (X < posY) { if (X < posY) {
/* a01 += i; a01 += i;
a02 += i; a02 += i;
a03 += i; a03 += i;
a04 += i; a04 += i;
@ -533,7 +533,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
a13 += i; a13 += i;
a14 += i; a14 += i;
a15 += i; a15 += i;
a16 += i; */ a16 += i;
b += 16 * i; b += 16 * i;
} else } else
if (X > posY) { if (X > posY) {
@ -1130,14 +1130,14 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 7); i = (m & 7);
if (i > 0) { if (i > 0) {
if (X < posY) { if (X < posY) {
/* a01 += i; a01 += i;
a02 += i; a02 += i;
a03 += i; a03 += i;
a04 += i; a04 += i;
a05 += i; a05 += i;
a06 += i; a06 += i;
a07 += i; a07 += i;
a08 += i; */ a08 += i;
b += 8 * i; b += 8 * i;
} else } else
if (X > posY) { if (X > posY) {
@ -1156,13 +1156,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b += 8; b += 8;
} }
/* a02 += i * lda; a02 += i * lda;
a03 += i * lda; a03 += i * lda;
a04 += i * lda; a04 += i * lda;
a05 += i * lda; a05 += i * lda;
a06 += i * lda; a06 += i * lda;
a07 += i * lda; a07 += i * lda;
a08 += i * lda; */ a08 += i * lda;
} else { } else {
#ifdef UNIT #ifdef UNIT
b[ 0] = ONE; b[ 0] = ONE;
@ -1371,10 +1371,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = (m & 3); i = (m & 3);
if (i > 0) { if (i > 0) {
if (X < posY) { if (X < posY) {
/* a01 += i; a01 += i;
a02 += i; a02 += i;
a03 += i; a03 += i;
a04 += i; */ a04 += i;
b += 4 * i; b += 4 * i;
} else } else
if (X > posY) { if (X > posY) {
@ -1387,9 +1387,9 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
a01 += lda; a01 += lda;
b += 4; b += 4;
} }
/* a02 += lda; a02 += lda;
a03 += lda; a03 += lda;
a04 += lda; */ a04 += lda;
} else { } else {
#ifdef UNIT #ifdef UNIT
@ -1487,19 +1487,23 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {
a01 ++; a01 ++;
a02 ++; a02 ++;
} else { b += 2;
#ifdef UNIT } else
if (X > posY) { if (X > posY) {
#endif
b[ 0] = *(a01 + 0); b[ 0] = *(a01 + 0);
#ifdef UNIT b[ 1] = *(a01 + 1);
a01 += lda;
b += 2;
} else { } else {
#ifdef UNIT
b[ 0] = ONE; b[ 0] = ONE;
} b[ 1] = *(a01 + 1);
#else
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
#endif #endif
b[ 1] = *(a01 + 1); b += 2;
} }
b += 2;
} }
posY += 2; posY += 2;
} }
@ -1518,25 +1522,28 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i > 0) { if (i > 0) {
do { do {
if (X < posY) { if (X < posY) {
a01 ++; a01 += 1;
} else { b ++;
#ifdef UNIT } else
if (X > posY) { if (X > posY) {
#endif
b[ 0] = *(a01 + 0); b[ 0] = *(a01 + 0);
#ifdef UNIT a01 += lda;
b ++;
} else { } else {
#ifdef UNIT
b[ 0] = ONE; b[ 0] = ONE;
} #else
b[ 0] = *(a01 + 0);
#endif #endif
a01 += lda; a01 += lda;
} b ++;
b ++; }
X ++;
i --; X += 1;
i --;
} while (i > 0); } while (i > 0);
} }
// posY += 1; posY += 1;
} }
return 0; return 0;

View File

@ -117,8 +117,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (m & 1) { if (m & 1) {
if (X < posY) { if (X < posY) {
/* ao1 += 1; ao1 += 1;
ao2 += 1; */ ao2 += 1;
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
@ -127,7 +127,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
// ao1 += lda; ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@ -139,7 +139,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = ZERO; b[ 1] = ZERO;
#endif #endif
// ao1 += lda; ao1 += lda;
b += 2; b += 2;
} }
} }
@ -161,18 +161,27 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = m; i = m;
if (m > 0) { if (m > 0) {
do { do {
if (X < posY) {
b += 1;
ao1 += 1;
} else
if (X > posY) {
data01 = *(ao1 + 0);
b[ 0] = data01;
b += 1;
ao1 += lda;
} else {
#ifdef UNIT #ifdef UNIT
if (X > posY) { b[ 0] = ONE;
#else
data01 = *(ao1 + 0);
b[ 0] = data01;
#endif #endif
b[ 0] = *(ao1 + 0); b += 1;
#ifdef UNIT ao1 += lda;
} else { }
b[ 0] = ONE;
} X += 1;
#endif
b ++;
ao1 += lda;
X ++;
i --; i --;
} while (i > 0); } while (i > 0);
} }

View File

@ -201,18 +201,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) { if (X < posY) {
if (m & 2) { if (m & 2) {
/* ao1 += 2; ao1 += 2;
ao2 += 2; ao2 += 2;
ao3 += 2; ao3 += 2;
ao4 += 2; */ ao4 += 2;
b += 8; b += 8;
} }
if (m & 1) { if (m & 1) {
/* ao1 += 1; ao1 += 1;
ao2 += 1; ao2 += 1;
ao3 += 1; ao3 += 1;
ao4 += 1; */ ao4 += 1;
b += 4; b += 4;
} }
@ -238,7 +238,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 7] = data08; b[ 7] = data08;
ao1 += 2 * lda; ao1 += 2 * lda;
// ao2 += 2 * lda; ao2 += 2 * lda;
b += 8; b += 8;
} }
@ -253,7 +253,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 2] = data03; b[ 2] = data03;
b[ 3] = data04; b[ 3] = data04;
// ao1 += lda; ao1 += lda;
b += 4; b += 4;
} }
@ -401,7 +401,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i) { if (i) {
if (X < posY) { if (X < posY) {
// ao1 += 2; ao1 += 2;
b += 2; b += 2;
} else } else
if (X > posY) { if (X > posY) {
@ -411,7 +411,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
b[ 0] = data01; b[ 0] = data01;
b[ 1] = data02; b[ 1] = data02;
// ao1 += lda; ao1 += lda;
b += 2; b += 2;
} else { } else {
#ifdef UNIT #ifdef UNIT
@ -443,21 +443,26 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
do { do {
if (X < posY) { if (X < posY) {
b += 1;
ao1 += 1; ao1 += 1;
} else { } else
#ifdef UNIT
if (X > posY) { if (X > posY) {
#endif data01 = *(ao1 + 0);
b[ 0] = *(ao1 + 0); b[ 0] = data01;
#ifdef UNIT ao1 += lda;
b += 1;
} else { } else {
#ifdef UNIT
b[ 0] = ONE; b[ 0] = ONE;
} #else
data01 = *(ao1 + 0);
b[ 0] = data01;
#endif #endif
ao1 += lda; ao1 += lda;
} b += 1;
b ++; }
X ++;
X += 1;
i --; i --;
} while (i > 0); } while (i > 0);
} }

View File

@ -206,7 +206,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
} }
a1 += 2 * lda; a1 += 2 * lda;
// a2 += 2 * lda; a2 += 2 * lda;
b += 8; b += 8;
ii += 2; ii += 2;

View File

@ -139,18 +139,48 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} }
if (m & 1) { if (m & 1) {
#ifdef UNIT
if (X > posY) {
ao1 += 2;
ao2 += 2;
b += 4;
} else
if (X < posY) { if (X < posY) {
#endif data1 = *(ao1 + 0);
b[ 0] = *(ao1 + 0); data2 = *(ao1 + 1);
b[ 1] = *(ao1 + 1); data3 = *(ao1 + 2);
#ifdef UNIT data4 = *(ao1 + 3);
b[ 0] = data1;
b[ 1] = data2;
b[ 2] = data3;
b[ 3] = data4;
ao1 += lda;
b += 4;
} else { } else {
#ifdef UNIT
data3 = *(ao1 + 2);
data4 = *(ao1 + 3);
b[ 0] = ONE; b[ 0] = ONE;
b[ 1] = ZERO; b[ 1] = ZERO;
} b[ 2] = data3;
b[ 3] = data4;
#else
data1 = *(ao1 + 0);
data2 = *(ao1 + 1);
data3 = *(ao1 + 2);
data4 = *(ao1 + 3);
b[ 0] = data1;
b[ 1] = data2;
b[ 2] = data3;
b[ 3] = data4;
#endif #endif
b += 4; b += 4;
}
} }
posY += 2; posY += 2;
@ -203,7 +233,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
} while (i > 0); } while (i > 0);
} }
// posY += 1; posY += 1;
} }
return 0; return 0;

View File

@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
BLASLONG i, ii, j, jj; BLASLONG i, ii, j, jj;
FLOAT data01 = 0.0, data02 = 0.0; FLOAT data01, data02;
FLOAT *a1; FLOAT *a1;
lda *= 2; lda *= 2;

View File

@ -43,8 +43,8 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
BLASLONG i, ii, j, jj; BLASLONG i, ii, j, jj;
FLOAT data01 = 0.0, data02 = 0.0, data03, data04; FLOAT data01, data02, data03, data04;
FLOAT data05, data06, data07 = 0.0, data08 = 0.0; FLOAT data05, data06, data07, data08;
FLOAT *a1, *a2; FLOAT *a1, *a2;
lda *= 2; lda *= 2;