skylakex: Make the sgemm/dgemm beta code robust for a N=0 or M=0 case
in the threading code there are cases where N or M can become 0, and the optimized beta code did not handle this well, leading to a crash during the audit for the crash a few edge conditions on the if statements were found and fixed as well
This commit is contained in:
parent
f5595d0262
commit
dcc5d6291e
|
@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (m == 0 || n == 0)
|
||||
return 0;
|
||||
|
||||
c_offset = c;
|
||||
|
||||
|
@ -69,7 +71,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
|||
|
||||
i = m;
|
||||
|
||||
while (i > 32) {
|
||||
while (i >= 32) {
|
||||
_mm512_storeu_pd(c_offset1, z_zero);
|
||||
_mm512_storeu_pd(c_offset1 + 8, z_zero);
|
||||
_mm512_storeu_pd(c_offset1 + 16, z_zero);
|
||||
|
@ -77,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
|||
c_offset1 += 32;
|
||||
i -= 32;
|
||||
}
|
||||
while (i > 8) {
|
||||
while (i >= 8) {
|
||||
_mm512_storeu_pd(c_offset1, z_zero);
|
||||
c_offset1 += 8;
|
||||
i -= 8;
|
||||
|
|
|
@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (n == 0 || m == 0)
|
||||
return;
|
||||
|
||||
c_offset = c;
|
||||
|
||||
|
@ -71,13 +73,13 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
|||
|
||||
i = m;
|
||||
|
||||
while (i > 32) {
|
||||
while (i >= 32) {
|
||||
_mm512_storeu_ps(c_offset1, z_zero);
|
||||
_mm512_storeu_ps(c_offset1 + 16, z_zero);
|
||||
c_offset1 += 32;
|
||||
i -= 32;
|
||||
}
|
||||
while (i > 8) {
|
||||
while (i >= 8) {
|
||||
_mm256_storeu_ps(c_offset1, y_zero);
|
||||
c_offset1 += 8;
|
||||
i -= 8;
|
||||
|
|
Loading…
Reference in New Issue