Merge pull request #3273 from austinpagan/sbgemm_gcc10_fix
Power10: Fix for SBGEMM
This commit is contained in:
commit
c4b464cac6
|
@ -98,6 +98,30 @@ typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
|
||||||
rowC = (v2sf_t *) &CO[7* ldc+J]; \
|
rowC = (v2sf_t *) &CO[7* ldc+J]; \
|
||||||
rowC[0] += result[6] * alpha;
|
rowC[0] += result[6] * alpha;
|
||||||
|
|
||||||
|
#define SAVE4x2_ACC_SCALAR(ACC) { \
|
||||||
|
__builtin_mma_disassemble_acc ((void *)result, ACC); \
|
||||||
|
res[0] = result[0] * alpha; \
|
||||||
|
res[1] = result[1] * alpha; \
|
||||||
|
res[2] = result[2] * alpha; \
|
||||||
|
res[3] = result[3] * alpha; \
|
||||||
|
CO[0 * ldc] += res[0][0]; \
|
||||||
|
CO[1 * ldc] += res[1][0]; \
|
||||||
|
CO[2 * ldc] += res[2][0]; \
|
||||||
|
CO[3 * ldc] += res[3][0]; \
|
||||||
|
}
|
||||||
|
|
||||||
|
#define SAVE4x2_ACC1_SCALAR(ACC) { \
|
||||||
|
__builtin_mma_disassemble_acc ((void *)result, ACC); \
|
||||||
|
res[0] = result[0] * alpha; \
|
||||||
|
res[1] = result[1] * alpha; \
|
||||||
|
res[2] = result[2] * alpha; \
|
||||||
|
res[3] = result[3] * alpha; \
|
||||||
|
CO[4 * ldc] += res[0][0]; \
|
||||||
|
CO[5 * ldc] += res[1][0]; \
|
||||||
|
CO[6 * ldc] += res[2][0]; \
|
||||||
|
CO[7 * ldc] += res[3][0]; \
|
||||||
|
}
|
||||||
|
|
||||||
#define MMA __builtin_mma_xvbf16ger2pp
|
#define MMA __builtin_mma_xvbf16ger2pp
|
||||||
|
|
||||||
#define SAVE2x4_ACC(ACC, J) \
|
#define SAVE2x4_ACC(ACC, J) \
|
||||||
|
@ -313,7 +337,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
||||||
{
|
{
|
||||||
IFLOAT *BO = B;
|
IFLOAT *BO = B;
|
||||||
v2sf_t *rowC;
|
v2sf_t *rowC;
|
||||||
v2sf_t result[8];
|
v4sf_t result[4], res[4];
|
||||||
__vector_quad acc0, acc1;
|
__vector_quad acc0, acc1;
|
||||||
__builtin_mma_xxsetaccz (&acc0);
|
__builtin_mma_xxsetaccz (&acc0);
|
||||||
__builtin_mma_xxsetaccz (&acc1);
|
__builtin_mma_xxsetaccz (&acc1);
|
||||||
|
@ -335,8 +359,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
||||||
MMA (&acc0, MERGE_HIGH (rowB[0], vzero), (vec_t) rowA);
|
MMA (&acc0, MERGE_HIGH (rowB[0], vzero), (vec_t) rowA);
|
||||||
MMA (&acc1, MERGE_LOW (rowB[0], vzero), (vec_t) rowA);
|
MMA (&acc1, MERGE_LOW (rowB[0], vzero), (vec_t) rowA);
|
||||||
}
|
}
|
||||||
SAVE4x2_ACC (&acc0, 0);
|
SAVE4x2_ACC_SCALAR (&acc0);
|
||||||
SAVE4x2_ACC1 (&acc1, 0);
|
SAVE4x2_ACC1_SCALAR (&acc1);
|
||||||
CO += 1;
|
CO += 1;
|
||||||
AO += k;
|
AO += k;
|
||||||
BO += (k << 3);
|
BO += (k << 3);
|
||||||
|
@ -547,7 +571,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
||||||
{
|
{
|
||||||
IFLOAT *BO = B;
|
IFLOAT *BO = B;
|
||||||
v2sf_t *rowC;
|
v2sf_t *rowC;
|
||||||
v2sf_t result[8];
|
v4sf_t result[4], res[4];
|
||||||
__vector_quad acc0;
|
__vector_quad acc0;
|
||||||
BLASLONG l = 0;
|
BLASLONG l = 0;
|
||||||
__builtin_mma_xxsetaccz (&acc0);
|
__builtin_mma_xxsetaccz (&acc0);
|
||||||
|
@ -571,7 +595,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
||||||
};
|
};
|
||||||
MMA (&acc0, (vec_t)(rowB_mrg), (vec_t) rowA);
|
MMA (&acc0, (vec_t)(rowB_mrg), (vec_t) rowA);
|
||||||
}
|
}
|
||||||
SAVE4x2_ACC (&acc0, 0);
|
SAVE4x2_ACC_SCALAR (&acc0);
|
||||||
AO += k;
|
AO += k;
|
||||||
BO += (k << 2);
|
BO += (k << 2);
|
||||||
CO += 1;
|
CO += 1;
|
||||||
|
|
Loading…
Reference in New Issue