skylakex: Make the sgemm/dgemm beta code robust for a N=0 or M=0 case

in the threading code there are cases where N or M can become 0,
and the optimized beta code did not handle this well, leading
to a crash

during the audit for the crash a few edge conditions on the if statements
were found and fixed as well
This commit is contained in:
Arjan van de Ven 2018-11-01 01:42:09 +00:00
parent f5595d0262
commit dcc5d6291e
2 changed files with 8 additions and 4 deletions

View File

@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
return 0;
}
if (m == 0 || n == 0)
return 0;
c_offset = c;
@ -69,7 +71,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
i = m;
while (i > 32) {
while (i >= 32) {
_mm512_storeu_pd(c_offset1, z_zero);
_mm512_storeu_pd(c_offset1 + 8, z_zero);
_mm512_storeu_pd(c_offset1 + 16, z_zero);
@ -77,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
c_offset1 += 32;
i -= 32;
}
while (i > 8) {
while (i >= 8) {
_mm512_storeu_pd(c_offset1, z_zero);
c_offset1 += 8;
i -= 8;

View File

@ -55,6 +55,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
return 0;
}
if (n == 0 || m == 0)
return;
c_offset = c;
@ -71,13 +73,13 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
i = m;
while (i > 32) {
while (i >= 32) {
_mm512_storeu_ps(c_offset1, z_zero);
_mm512_storeu_ps(c_offset1 + 16, z_zero);
c_offset1 += 32;
i -= 32;
}
while (i > 8) {
while (i >= 8) {
_mm256_storeu_ps(c_offset1, y_zero);
c_offset1 += 8;
i -= 8;