From a5b164946ccc9dec037d4e0a1cd2f2202b1c918a Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Mon, 5 Oct 2020 22:13:25 +0300 Subject: [PATCH] add fninit to reset fpu registers before assembler routines --- kernel/x86_64/amax.S | 2 ++ kernel/x86_64/asum.S | 3 ++- kernel/x86_64/dot.S | 1 + kernel/x86_64/iamax.S | 1 + kernel/x86_64/izamax.S | 1 + kernel/x86_64/nrm2.S | 1 + kernel/x86_64/qconjg.S | 1 + kernel/x86_64/qdot.S | 2 ++ kernel/x86_64/qgemm_kernel_2x2.S | 2 ++ kernel/x86_64/qgemv_n.S | 2 ++ kernel/x86_64/qgemv_t.S | 1 + kernel/x86_64/qtrsm_kernel_LN_2x2.S | 2 ++ kernel/x86_64/qtrsm_kernel_LT_2x2.S | 2 ++ kernel/x86_64/qtrsm_kernel_RT_2x2.S | 3 +++ kernel/x86_64/sum.S | 2 ++ kernel/x86_64/xdot.S | 3 +++ kernel/x86_64/xgemm3m_kernel_2x2.S | 2 ++ kernel/x86_64/xgemm_kernel_1x1.S | 2 ++ kernel/x86_64/xgemv_n.S | 2 ++ kernel/x86_64/xgemv_t.S | 2 ++ kernel/x86_64/xtrsm_kernel_LT_1x1.S | 2 ++ kernel/x86_64/zamax.S | 2 ++ kernel/x86_64/zasum.S | 2 ++ kernel/x86_64/zdot.S | 2 ++ kernel/x86_64/znrm2.S | 2 ++ kernel/x86_64/zscal.S | 2 ++ kernel/x86_64/zsum.S | 2 ++ 27 files changed, 50 insertions(+), 1 deletion(-) diff --git a/kernel/x86_64/amax.S b/kernel/x86_64/amax.S index 0e9bf4db4..257147dfb 100644 --- a/kernel/x86_64/amax.S +++ b/kernel/x86_64/amax.S @@ -54,6 +54,8 @@ PROLOGUE PROFCODE + + fninit salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/asum.S b/kernel/x86_64/asum.S index 31f973894..24f57dd11 100644 --- a/kernel/x86_64/asum.S +++ b/kernel/x86_64/asum.S @@ -49,7 +49,8 @@ PROLOGUE PROFCODE - + + fninit fldz testq M, M jle .L999 diff --git a/kernel/x86_64/dot.S b/kernel/x86_64/dot.S index e63d9cd89..2319885f1 100644 --- a/kernel/x86_64/dot.S +++ b/kernel/x86_64/dot.S @@ -49,6 +49,7 @@ PROLOGUE PROFCODE + fninit salq $BASE_SHIFT, INCX salq $BASE_SHIFT, INCY diff --git a/kernel/x86_64/iamax.S b/kernel/x86_64/iamax.S index 79e1bae1d..0c666d623 100644 --- a/kernel/x86_64/iamax.S +++ b/kernel/x86_64/iamax.S @@ -59,6 +59,7 @@ PROLOGUE PROFCODE + fninit salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/izamax.S b/kernel/x86_64/izamax.S index c066acd62..e450c2cd2 100644 --- a/kernel/x86_64/izamax.S +++ b/kernel/x86_64/izamax.S @@ -59,6 +59,7 @@ PROLOGUE PROFCODE + fninit salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/nrm2.S b/kernel/x86_64/nrm2.S index e9be1262a..548e3b744 100644 --- a/kernel/x86_64/nrm2.S +++ b/kernel/x86_64/nrm2.S @@ -50,6 +50,7 @@ PROLOGUE PROFCODE + fninit fldz testq M, M jle .L999 diff --git a/kernel/x86_64/qconjg.S b/kernel/x86_64/qconjg.S index 49ca76649..bab541831 100644 --- a/kernel/x86_64/qconjg.S +++ b/kernel/x86_64/qconjg.S @@ -41,6 +41,7 @@ PROLOGUE PROFCODE + fninit fldz FLD 1 * SIZE(ARG1) diff --git a/kernel/x86_64/qdot.S b/kernel/x86_64/qdot.S index a48a04fdd..e7d31360b 100644 --- a/kernel/x86_64/qdot.S +++ b/kernel/x86_64/qdot.S @@ -58,6 +58,8 @@ PROLOGUE + fninit + pushl %edi pushl %esi pushl %ebx diff --git a/kernel/x86_64/qgemm_kernel_2x2.S b/kernel/x86_64/qgemm_kernel_2x2.S index 99db3961f..7b5e7707d 100644 --- a/kernel/x86_64/qgemm_kernel_2x2.S +++ b/kernel/x86_64/qgemm_kernel_2x2.S @@ -74,6 +74,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qgemv_n.S b/kernel/x86_64/qgemv_n.S index 630d03ffb..1b65b03f0 100644 --- a/kernel/x86_64/qgemv_n.S +++ b/kernel/x86_64/qgemv_n.S @@ -76,6 +76,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qgemv_t.S b/kernel/x86_64/qgemv_t.S index d7c9cd2a5..00188c257 100644 --- a/kernel/x86_64/qgemv_t.S +++ b/kernel/x86_64/qgemv_t.S @@ -75,6 +75,7 @@ PROLOGUE PROFCODE + fninit subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LN_2x2.S b/kernel/x86_64/qtrsm_kernel_LN_2x2.S index 536042e65..030eff893 100644 --- a/kernel/x86_64/qtrsm_kernel_LN_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LN_2x2.S @@ -74,6 +74,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LT_2x2.S b/kernel/x86_64/qtrsm_kernel_LT_2x2.S index 6e94976c5..d86972c72 100644 --- a/kernel/x86_64/qtrsm_kernel_LT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LT_2x2.S @@ -74,6 +74,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_RT_2x2.S b/kernel/x86_64/qtrsm_kernel_RT_2x2.S index caa7de14a..2826a62c9 100644 --- a/kernel/x86_64/qtrsm_kernel_RT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_RT_2x2.S @@ -74,6 +74,9 @@ PROLOGUE PROFCODE + fninit + + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/sum.S b/kernel/x86_64/sum.S index d075eaa04..3d5fa7cc2 100644 --- a/kernel/x86_64/sum.S +++ b/kernel/x86_64/sum.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/xdot.S b/kernel/x86_64/xdot.S index ea97164b2..ec89b799c 100644 --- a/kernel/x86_64/xdot.S +++ b/kernel/x86_64/xdot.S @@ -59,6 +59,9 @@ PROFCODE + fninit + + #define N %ebx #define X %esi #define INCX %ecx diff --git a/kernel/x86_64/xgemm3m_kernel_2x2.S b/kernel/x86_64/xgemm3m_kernel_2x2.S index 843fc243a..e8da78d82 100644 --- a/kernel/x86_64/xgemm3m_kernel_2x2.S +++ b/kernel/x86_64/xgemm3m_kernel_2x2.S @@ -78,6 +78,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xgemm_kernel_1x1.S b/kernel/x86_64/xgemm_kernel_1x1.S index e0cd1f1df..f04ab07f5 100644 --- a/kernel/x86_64/xgemm_kernel_1x1.S +++ b/kernel/x86_64/xgemm_kernel_1x1.S @@ -97,6 +97,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xgemv_n.S b/kernel/x86_64/xgemv_n.S index cbde6402d..7d28c118a 100644 --- a/kernel/x86_64/xgemv_n.S +++ b/kernel/x86_64/xgemv_n.S @@ -76,6 +76,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xgemv_t.S b/kernel/x86_64/xgemv_t.S index 31320f651..e79676088 100644 --- a/kernel/x86_64/xgemv_t.S +++ b/kernel/x86_64/xgemv_t.S @@ -75,6 +75,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/xtrsm_kernel_LT_1x1.S b/kernel/x86_64/xtrsm_kernel_LT_1x1.S index a61a240fd..54d41932f 100644 --- a/kernel/x86_64/xtrsm_kernel_LT_1x1.S +++ b/kernel/x86_64/xtrsm_kernel_LT_1x1.S @@ -90,6 +90,8 @@ PROLOGUE PROFCODE + fninit + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/zamax.S b/kernel/x86_64/zamax.S index 74e127e6c..bfd836193 100644 --- a/kernel/x86_64/zamax.S +++ b/kernel/x86_64/zamax.S @@ -55,6 +55,8 @@ PROLOGUE PROFCODE + fninit + salq $ZBASE_SHIFT, INCX fldz diff --git a/kernel/x86_64/zasum.S b/kernel/x86_64/zasum.S index c372fc5dd..9ea2aadc0 100644 --- a/kernel/x86_64/zasum.S +++ b/kernel/x86_64/zasum.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/zdot.S b/kernel/x86_64/zdot.S index 94d1008ff..f7df919b7 100644 --- a/kernel/x86_64/zdot.S +++ b/kernel/x86_64/zdot.S @@ -54,6 +54,8 @@ PROLOGUE PROFCODE + fninit + #ifdef WINDOWS_ABI movq 40(%rsp), INCY #endif diff --git a/kernel/x86_64/znrm2.S b/kernel/x86_64/znrm2.S index 4115eab1d..cb02a5a9f 100644 --- a/kernel/x86_64/znrm2.S +++ b/kernel/x86_64/znrm2.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/zscal.S b/kernel/x86_64/zscal.S index 5282e0f72..08c0831a4 100644 --- a/kernel/x86_64/zscal.S +++ b/kernel/x86_64/zscal.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + salq $ZBASE_SHIFT, INCX FLD 8(%rsp) diff --git a/kernel/x86_64/zsum.S b/kernel/x86_64/zsum.S index 45e0ddff5..1c3904839 100644 --- a/kernel/x86_64/zsum.S +++ b/kernel/x86_64/zsum.S @@ -50,6 +50,8 @@ PROLOGUE PROFCODE + fninit + fldz testq M, M jle .L999