Small Matrix: skylakex: fix build error in old compiler

This commit is contained in:
Wangyang Guo 2021-08-05 04:43:47 +00:00
parent 76ea8db4da
commit 44d0032f3b
6 changed files with 14 additions and 14 deletions

View File

@ -372,8 +372,8 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
0, 1, 0|8, 1|8, 4, 5, 4|8, 5|8,
2, 3, 2|8, 3|8, 6, 7, 6|8, 7|8,
};
__m512i idx_lo = _mm512_loadu_epi64(permute_table);
__m512i idx_hi = _mm512_loadu_epi64(permute_table + 8);
__m512i idx_lo = _mm512_loadu_si512(permute_table);
__m512i idx_hi = _mm512_loadu_si512(permute_table + 8);
for (; i < m4; i += 4, mi += 4) {
for (j = 0; j < n4; j += 4) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(1, 0); DECLARE_RESULT_512(2, 0); DECLARE_RESULT_512(3, 0);

View File

@ -385,7 +385,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for (int ii = 0; ii < 8; ii++) {
index_n[ii] = ii * ldc;
}
__m512i vindex_n = _mm512_loadu_epi64(index_n);
__m512i vindex_n = _mm512_loadu_si512(index_n);
for (; i < m4; i += 4) {
for (j = 0; j < n32; j += 32) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(1, 0); DECLARE_RESULT_512(2, 0); DECLARE_RESULT_512(3, 0);

View File

@ -105,8 +105,8 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
0, 1, 0|8, 1|8, 4, 5, 4|8, 5|8,
2, 3, 2|8, 3|8, 6, 7, 6|8, 7|8,
};
__m512i idx_lo = _mm512_loadu_epi64(permute_table);
__m512i idx_hi = _mm512_loadu_epi64(permute_table + 8);
__m512i idx_lo = _mm512_loadu_si512(permute_table);
__m512i idx_hi = _mm512_loadu_si512(permute_table + 8);
for (i = 0; i < m4; i += 4) {
for (j = 0; j < n4; j += 4) {

View File

@ -189,8 +189,8 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
0, 1, 4, 5, 0|8, 1|8, 4|8, 5|8,
2, 3, 6, 7, 2|8, 3|8, 6|8, 7|8,
};
__m512i idx_lo = _mm512_loadu_epi64(permute_table);
__m512i idx_hi = _mm512_loadu_epi64(permute_table + 8);
__m512i idx_lo = _mm512_loadu_si512(permute_table);
__m512i idx_hi = _mm512_loadu_si512(permute_table + 8);
for (i = 0; i < m8; i += 8) {
for (j = 0; j < n16; j += 16) {
@ -235,8 +235,8 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
0, 1, 0|8, 1|8, 4, 5, 4|8, 5|8,
2, 3, 2|8, 3|8, 6, 7, 6|8, 7|8,
};
idx_lo = _mm512_loadu_epi64(permute_table2);
idx_hi = _mm512_loadu_epi64(permute_table2 + 8);
idx_lo = _mm512_loadu_si512(permute_table2);
idx_hi = _mm512_loadu_si512(permute_table2 + 8);
for (j = 0; j < n32; j += 32) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(1, 0); DECLARE_RESULT_512(2, 0); DECLARE_RESULT_512(3, 0);
@ -289,7 +289,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for (int ii = 0; ii < 8; ii++) {
index_n[ii] = ii * ldc;
}
__m512i vindex_n = _mm512_loadu_epi64(index_n);
__m512i vindex_n = _mm512_loadu_si512(index_n);
#if !defined(B0)
__m512d beta_512 = _mm512_broadcastsd_pd(_mm_load_sd(&beta));
#endif

View File

@ -385,7 +385,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for (int ii = 0; ii < 16; ii++) {
index_n[ii] = ii * ldc;
}
__m512i vindex_n = _mm512_loadu_epi32(index_n);
__m512i vindex_n = _mm512_loadu_si512(index_n);
for (; i < m4; i += 4) {
for (j = 0; j < n64; j += 64) {
DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(1, 0); DECLARE_RESULT_512(2, 0); DECLARE_RESULT_512(3, 0);

View File

@ -215,8 +215,8 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b,
0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f,
};
__m512i idx_lo = _mm512_loadu_epi32(permute_table);
__m512i idx_hi = _mm512_loadu_epi32(permute_table + 16);
__m512i idx_lo = _mm512_loadu_si512(permute_table);
__m512i idx_hi = _mm512_loadu_si512(permute_table + 16);
__mmask16 kc = 0xcccc;
__mmask16 k3 = 0x3333;
__mmask8 mask8 = 0xff; // force use AVX128 instead of SSE
@ -311,7 +311,7 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp
for (int ii = 0; ii < 16; ii++) {
index_n[ii] = ii * ldc;
}
__m512i vindex_n = _mm512_loadu_epi32(index_n);
__m512i vindex_n = _mm512_loadu_si512(index_n);
#if !defined(B0)
__m512 beta_512 = _mm512_broadcastss_ps(_mm_load_ss(&beta));
#endif