From 36016fe34954d567ccea777ca692b798d5982990 Mon Sep 17 00:00:00 2001 From: Xianyi Date: Wed, 2 Mar 2011 18:45:30 +0800 Subject: [PATCH] On x86 32bits, gcc 4.4.3 generated wrong codes (movsd) from movlps in zdot_sse2.S line 191. This would casue zdotu & zdotc failures. Instead, use movlpd to walk around it. Fixed #8. Fixed #9. --- Changelog.txt | 3 +++ kernel/x86/zdot_sse2.S | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Changelog.txt b/Changelog.txt index b3c438471..463e03d03 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -13,6 +13,9 @@ common: * Imported GotoBLAS2 1.13 BSD version x86/x86 64: + * On x86 32bits, gcc 4.4.3 generated wrong codes (movsd) from movlps + in zdot_sse2.S line 191. This would casue zdotu & zdotc failures. + Instead,Walk around it. (Refs issue #8 #9 on github) * Modified ?axpy functions to return same netlib BLAS results when incx==0 or incy==0 (Refs issue #7 on github) * Modified ?swap functions to return same netlib BLAS results diff --git a/kernel/x86/zdot_sse2.S b/kernel/x86/zdot_sse2.S index 6304f01a7..0f7acffe4 100644 --- a/kernel/x86/zdot_sse2.S +++ b/kernel/x86/zdot_sse2.S @@ -1188,8 +1188,8 @@ testl $1, N jle .L48 - movlps -16 * SIZE(X), %xmm4 - movlps -16 * SIZE(Y), %xmm6 + movlpd -16 * SIZE(X), %xmm4 + movlpd -16 * SIZE(Y), %xmm6 pshufd $0x4e, %xmm6, %xmm3 mulpd %xmm4, %xmm6