Refs #189. Fixed the bug of s/cdot about invalid reading NAN on x86_64.

This commit is contained in:
Zhang Xianyi 2013-01-25 16:18:27 +08:00
parent 36e0982966
commit d311236dfd
2 changed files with 8 additions and 7 deletions

View File

@ -530,7 +530,7 @@
#endif #endif
movsd -32 * SIZE(Y), %xmm8 movsd -32 * SIZE(Y), %xmm8
pshufd $0x39, %xmm4, %xmm5 pshufd $0x29, %xmm4, %xmm5
mulps %xmm8, %xmm5 mulps %xmm8, %xmm5
addps %xmm5, %xmm3 addps %xmm5, %xmm3
@ -750,7 +750,8 @@
xorps %xmm5, %xmm5 xorps %xmm5, %xmm5
movhlps %xmm4, %xmm5 movhlps %xmm4, %xmm5
mulps -32 * SIZE(Y), %xmm5 movlps -32 * SIZE(Y), %xmm4
mulps %xmm4, %xmm5
addps %xmm5, %xmm0 addps %xmm5, %xmm0
addq $2 * SIZE, X addq $2 * SIZE, X
@ -992,7 +993,7 @@
movsd -32 * SIZE(Y), %xmm8 movsd -32 * SIZE(Y), %xmm8
movss %xmm5, %xmm4 movss %xmm5, %xmm4
shufps $0x93, %xmm5, %xmm4 shufps $0x93, %xmm4, %xmm4
mulps %xmm8, %xmm4 mulps %xmm8, %xmm4
addps %xmm4, %xmm3 addps %xmm4, %xmm3

View File

@ -699,7 +699,7 @@
movsd -32 * SIZE(X), %xmm4 movsd -32 * SIZE(X), %xmm4
pshufd $0xb1, %xmm4, %xmm12 pshufd $0xb1, %xmm4, %xmm12
shufps $0x39, %xmm8, %xmm8 shufps $0x59, %xmm8, %xmm8
mulps %xmm8, %xmm4 mulps %xmm8, %xmm4
addps %xmm4, %xmm0 addps %xmm4, %xmm0
mulps %xmm8, %xmm12 mulps %xmm8, %xmm12
@ -1336,7 +1336,7 @@
movss %xmm9, %xmm8 movss %xmm9, %xmm8
pshufd $0xb1, %xmm4, %xmm12 pshufd $0xb1, %xmm4, %xmm12
shufps $0x93, %xmm8, %xmm8 shufps $0x03, %xmm8, %xmm8
mulps %xmm8, %xmm4 mulps %xmm8, %xmm4
addps %xmm4, %xmm0 addps %xmm4, %xmm0
mulps %xmm8, %xmm12 mulps %xmm8, %xmm12
@ -1697,7 +1697,7 @@
movsd -32 * SIZE(Y), %xmm4 movsd -32 * SIZE(Y), %xmm4
pshufd $0xb1, %xmm4, %xmm12 pshufd $0xb1, %xmm4, %xmm12
shufps $0x39, %xmm8, %xmm8 shufps $0xa9, %xmm8, %xmm8
mulps %xmm8, %xmm4 mulps %xmm8, %xmm4
addps %xmm4, %xmm0 addps %xmm4, %xmm0
mulps %xmm8, %xmm12 mulps %xmm8, %xmm12
@ -2024,7 +2024,7 @@
movss %xmm9, %xmm8 movss %xmm9, %xmm8
pshufd $0xb1, %xmm4, %xmm12 pshufd $0xb1, %xmm4, %xmm12
shufps $0x93, %xmm8, %xmm8 shufps $0x03, %xmm8, %xmm8
mulps %xmm8, %xmm4 mulps %xmm8, %xmm4
addps %xmm4, %xmm0 addps %xmm4, %xmm0
mulps %xmm8, %xmm12 mulps %xmm8, %xmm12