Power10: Fix for SBGEMM
While testing bfloat16 sbgemm kernel, there are some failures for odd value inputs due to updating result for additional bytes.
This commit is contained in:
parent
dbba381dc3
commit
e6dd44d989
|
@ -98,6 +98,30 @@ typedef FLOAT v2sf_t __attribute__ ((vector_size (8)));
|
|||
rowC = (v2sf_t *) &CO[7* ldc+J]; \
|
||||
rowC[0] += result[6] * alpha;
|
||||
|
||||
#define SAVE4x2_ACC_SCALAR(ACC) { \
|
||||
__builtin_mma_disassemble_acc ((void *)result, ACC); \
|
||||
res[0] = result[0] * alpha; \
|
||||
res[1] = result[1] * alpha; \
|
||||
res[2] = result[2] * alpha; \
|
||||
res[3] = result[3] * alpha; \
|
||||
CO[0 * ldc] += res[0][0]; \
|
||||
CO[1 * ldc] += res[1][0]; \
|
||||
CO[2 * ldc] += res[2][0]; \
|
||||
CO[3 * ldc] += res[3][0]; \
|
||||
}
|
||||
|
||||
#define SAVE4x2_ACC1_SCALAR(ACC) { \
|
||||
__builtin_mma_disassemble_acc ((void *)result, ACC); \
|
||||
res[0] = result[0] * alpha; \
|
||||
res[1] = result[1] * alpha; \
|
||||
res[2] = result[2] * alpha; \
|
||||
res[3] = result[3] * alpha; \
|
||||
CO[4 * ldc] += res[0][0]; \
|
||||
CO[5 * ldc] += res[1][0]; \
|
||||
CO[6 * ldc] += res[2][0]; \
|
||||
CO[7 * ldc] += res[3][0]; \
|
||||
}
|
||||
|
||||
#define MMA __builtin_mma_xvbf16ger2pp
|
||||
|
||||
#define SAVE2x4_ACC(ACC, J) \
|
||||
|
@ -313,7 +337,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
|||
{
|
||||
IFLOAT *BO = B;
|
||||
v2sf_t *rowC;
|
||||
v2sf_t result[8];
|
||||
v4sf_t result[4], res[4];
|
||||
__vector_quad acc0, acc1;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
__builtin_mma_xxsetaccz (&acc1);
|
||||
|
@ -335,8 +359,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
|||
MMA (&acc0, MERGE_HIGH (rowB[0], vzero), (vec_t) rowA);
|
||||
MMA (&acc1, MERGE_LOW (rowB[0], vzero), (vec_t) rowA);
|
||||
}
|
||||
SAVE4x2_ACC (&acc0, 0);
|
||||
SAVE4x2_ACC1 (&acc1, 0);
|
||||
SAVE4x2_ACC_SCALAR (&acc0);
|
||||
SAVE4x2_ACC1_SCALAR (&acc1);
|
||||
CO += 1;
|
||||
AO += k;
|
||||
BO += (k << 3);
|
||||
|
@ -547,7 +571,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
|||
{
|
||||
IFLOAT *BO = B;
|
||||
v2sf_t *rowC;
|
||||
v2sf_t result[8];
|
||||
v4sf_t result[4], res[4];
|
||||
__vector_quad acc0;
|
||||
BLASLONG l = 0;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
|
@ -571,7 +595,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT * A,
|
|||
};
|
||||
MMA (&acc0, (vec_t)(rowB_mrg), (vec_t) rowA);
|
||||
}
|
||||
SAVE4x2_ACC (&acc0, 0);
|
||||
SAVE4x2_ACC_SCALAR (&acc0);
|
||||
AO += k;
|
||||
BO += (k << 2);
|
||||
CO += 1;
|
||||
|
|
Loading…
Reference in New Issue