From 46e415b1405044b038586537d213e4f2f04b8536 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 14 Feb 2019 22:43:18 +0100 Subject: [PATCH] Save and restore input argument 8 (lda4) Fixes miscompilation with gcc9 -ftree-vectorize (related to issue #2009) --- kernel/x86_64/sgemv_n_microk_haswell-4.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/x86_64/sgemv_n_microk_haswell-4.c b/kernel/x86_64/sgemv_n_microk_haswell-4.c index 2c90f8aa9..e89a16785 100644 --- a/kernel/x86_64/sgemv_n_microk_haswell-4.c +++ b/kernel/x86_64/sgemv_n_microk_haswell-4.c @@ -26,7 +26,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ - #define HAVE_KERNEL_4x8 1 static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLONG lda4, FLOAT *alpha) __attribute__ ((noinline)); @@ -49,6 +48,8 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "vbroadcastss (%9), %%ymm6 \n\t" // alpha + "movq %8, %%xmm10 \n\t" //save lda + "testq $0x04, %1 \n\t" "jz 2f \n\t" @@ -151,6 +152,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "4: \n\t" "vzeroupper \n\t" + "movq %%xmm10, %8 \n\t" //restore lda : "+r" (i), // 0 @@ -170,6 +172,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO "%xmm4", "%xmm5", "%xmm6", "%xmm7", "%xmm8", "%xmm9", + "%xmm10", "%xmm12", "%xmm13", "%xmm14", "%xmm15", "memory" ); @@ -177,7 +180,6 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO } - #define HAVE_KERNEL_4x4 1 static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT *alpha) __attribute__ ((noinline)); @@ -196,6 +198,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT "vbroadcastss (%8), %%ymm6 \n\t" // alpha + "testq $0x04, %1 \n\t" "jz 2f \n\t"