diff --git a/kernel/x86_64/scal_sse.S b/kernel/x86_64/scal_sse.S index 91149af3f..88ef4a3ed 100644 --- a/kernel/x86_64/scal_sse.S +++ b/kernel/x86_64/scal_sse.S @@ -60,8 +60,10 @@ #ifdef WINDOWS_ABI movq 40(%rsp), X movq 48(%rsp), INCX - + movq 64(%rsp), %r9 movaps %xmm3, %xmm0 +#else + movq 24(%rsp), %r9 #endif SAVEREGISTERS @@ -76,6 +78,8 @@ shufps $0, %xmm0, %xmm0 jne .L100 # Alpha != ZERO + + cmpq $1, %r9 je .L100 /* Alpha == ZERO */ cmpq $SIZE, INCX diff --git a/kernel/x86_64/scal_sse2.S b/kernel/x86_64/scal_sse2.S index b778895ba..485e6ef46 100644 --- a/kernel/x86_64/scal_sse2.S +++ b/kernel/x86_64/scal_sse2.S @@ -48,6 +48,7 @@ #define X ARG2 #define INCX ARG3 #endif +#define FLAG %r9 #define XX %r10 #define I %rax @@ -60,8 +61,10 @@ #ifdef WINDOWS_ABI movq 40(%rsp), X movq 48(%rsp), INCX - + movq 64(%rsp), FLAG movaps %xmm3, %xmm0 +#else + movq 24(%rsp), FLAG #endif SAVEREGISTERS @@ -75,6 +78,8 @@ comisd %xmm0, %xmm1 jne .L100 # Alpha != ZERO jp .L100 # For Alpha = NaN + + cmpq $1, FLAG je .L100 # disable the Alpha=zero path as it does not handle x=inf or nan /* Alpha == ZERO */ cmpq $SIZE, INCX