Merge pull request #3542 from martin-frbg/issue3540
Fix compilation for CooperLake on Windows/clang
This commit is contained in:
commit
522f809825
|
@ -114,10 +114,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc))
|
asm("vmovups %0, (%1, %2, 4)": : "v"(val1), "r"(addr), "r"(ldc))
|
||||||
|
|
||||||
#define _MASK_STORE_C_2nx16(addr, val0, val1) \
|
#define _MASK_STORE_C_2nx16(addr, val0, val1) \
|
||||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \
|
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
|
||||||
asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "k"(mmask)); \
|
asm("vfmadd213ps (%1, %3, 4), %2, %0 %{%4%}": "+v"(val1) : "r"(addr), "v"(alpha_512), "r"(ldc), "Yk"(mmask)); \
|
||||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask)); \
|
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask)); \
|
||||||
asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "k"(mmask))
|
asm("vmovups %0, (%1, %2, 4) %{%3%}": : "v"(val1), "r"(addr), "r"(ldc), "Yk"(mmask))
|
||||||
|
|
||||||
#define _REORDER_C_2X(result_0, result_1) { \
|
#define _REORDER_C_2X(result_0, result_1) { \
|
||||||
__m512 tmp0, tmp1; \
|
__m512 tmp0, tmp1; \
|
||||||
|
@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
asm("vmovups %0, (%1)": : "v"(val0), "r"(addr));
|
asm("vmovups %0, (%1)": : "v"(val0), "r"(addr));
|
||||||
|
|
||||||
#define _MASK_STORE_C_16(addr, val0) \
|
#define _MASK_STORE_C_16(addr, val0) \
|
||||||
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "k"(mmask)); \
|
asm("vfmadd213ps (%1), %2, %0 %{%3%} ": "+v"(val0) : "r"(addr), "v"(alpha_512), "Yk"(mmask)); \
|
||||||
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "k"(mmask));
|
asm("vmovups %0, (%1) %{%2%}": : "v"(val0), "r"(addr), "Yk"(mmask));
|
||||||
|
|
||||||
#define N_STORE_4X(A, Bx, By) { \
|
#define N_STORE_4X(A, Bx, By) { \
|
||||||
_REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \
|
_REORDER_C_2X(result_00_##A##Bx##By, result_01_##A##Bx##By); \
|
||||||
|
|
|
@ -135,7 +135,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
|
||||||
0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b,
|
0x0, 0x1, 0x2, 0x3, 0x10, 0x11, 0x12, 0x13, 0x8, 0x9, 0xa, 0xb, 0x18, 0x19, 0x1a, 0x1b,
|
||||||
0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f,
|
0x4, 0x5, 0x6, 0x7, 0x14, 0x15, 0x16, 0x17, 0xc, 0xd, 0xe, 0xf, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||||
};
|
};
|
||||||
u_int64_t permute_table2[] = {
|
uint64_t permute_table2[] = {
|
||||||
0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3,
|
0x00, 0x01, 0x02, 0x03, 8|0x0, 8|0x1, 8|0x2, 8|0x3,
|
||||||
0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7,
|
0x04, 0x05, 0x06, 0x07, 8|0x4, 8|0x5, 8|0x6, 8|0x7,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in New Issue