From 403eb513a0616020e7238b531bad739f6baef43a Mon Sep 17 00:00:00 2001 From: Matti Picus Date: Mon, 12 Oct 2020 18:15:01 +0300 Subject: [PATCH] use emms instead, add WIN guards --- kernel/x86_64/amax.S | 4 +++- kernel/x86_64/asum.S | 5 ++++- kernel/x86_64/dot.S | 5 ++++- kernel/x86_64/iamax.S | 5 ++++- kernel/x86_64/izamax.S | 5 ++++- kernel/x86_64/nrm2.S | 5 ++++- kernel/x86_64/qconjg.S | 5 ++++- kernel/x86_64/qdot.S | 4 +++- kernel/x86_64/qgemm_kernel_2x2.S | 4 +++- kernel/x86_64/qgemv_n.S | 4 +++- kernel/x86_64/qgemv_t.S | 5 ++++- kernel/x86_64/qtrsm_kernel_LN_2x2.S | 4 +++- kernel/x86_64/qtrsm_kernel_LT_2x2.S | 4 +++- kernel/x86_64/qtrsm_kernel_RT_2x2.S | 5 +++-- kernel/x86_64/sum.S | 4 +++- kernel/x86_64/xdot.S | 4 +++- kernel/x86_64/xgemm3m_kernel_2x2.S | 4 +++- kernel/x86_64/xgemm_kernel_1x1.S | 4 +++- kernel/x86_64/xgemv_n.S | 4 +++- kernel/x86_64/xgemv_t.S | 4 +++- kernel/x86_64/xtrsm_kernel_LT_1x1.S | 4 +++- kernel/x86_64/zamax.S | 4 +++- kernel/x86_64/zasum.S | 4 +++- kernel/x86_64/zdot.S | 4 ++-- kernel/x86_64/znrm2.S | 4 +++- kernel/x86_64/zscal.S | 4 +++- kernel/x86_64/zsum.S | 4 +++- 27 files changed, 87 insertions(+), 29 deletions(-) diff --git a/kernel/x86_64/amax.S b/kernel/x86_64/amax.S index 257147dfb..1498bb226 100644 --- a/kernel/x86_64/amax.S +++ b/kernel/x86_64/amax.S @@ -55,7 +55,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/asum.S b/kernel/x86_64/asum.S index 24f57dd11..a2cbfd480 100644 --- a/kernel/x86_64/asum.S +++ b/kernel/x86_64/asum.S @@ -50,7 +50,10 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/dot.S b/kernel/x86_64/dot.S index 2319885f1..a11d25e5d 100644 --- a/kernel/x86_64/dot.S +++ b/kernel/x86_64/dot.S @@ -49,7 +49,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif salq $BASE_SHIFT, INCX salq $BASE_SHIFT, INCY diff --git a/kernel/x86_64/iamax.S b/kernel/x86_64/iamax.S index 0c666d623..00999e25f 100644 --- a/kernel/x86_64/iamax.S +++ b/kernel/x86_64/iamax.S @@ -59,7 +59,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif salq $BASE_SHIFT, INCX diff --git a/kernel/x86_64/izamax.S b/kernel/x86_64/izamax.S index e450c2cd2..b24b2e692 100644 --- a/kernel/x86_64/izamax.S +++ b/kernel/x86_64/izamax.S @@ -59,7 +59,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/nrm2.S b/kernel/x86_64/nrm2.S index 548e3b744..b79ac2adb 100644 --- a/kernel/x86_64/nrm2.S +++ b/kernel/x86_64/nrm2.S @@ -50,7 +50,10 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif + fldz testq M, M jle .L999 diff --git a/kernel/x86_64/qconjg.S b/kernel/x86_64/qconjg.S index bab541831..823a15a84 100644 --- a/kernel/x86_64/qconjg.S +++ b/kernel/x86_64/qconjg.S @@ -41,7 +41,10 @@ PROLOGUE PROFCODE - fninit + +#ifdef WINDOWS_ABI + emms +#endif fldz FLD 1 * SIZE(ARG1) diff --git a/kernel/x86_64/qdot.S b/kernel/x86_64/qdot.S index e7d31360b..2243b6b6d 100644 --- a/kernel/x86_64/qdot.S +++ b/kernel/x86_64/qdot.S @@ -58,7 +58,9 @@ PROLOGUE - fninit +#ifdef WINDOWS_ABI + emms +#endif pushl %edi pushl %esi diff --git a/kernel/x86_64/qgemm_kernel_2x2.S b/kernel/x86_64/qgemm_kernel_2x2.S index 7b5e7707d..c11f3a91d 100644 --- a/kernel/x86_64/qgemm_kernel_2x2.S +++ b/kernel/x86_64/qgemm_kernel_2x2.S @@ -74,7 +74,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qgemv_n.S b/kernel/x86_64/qgemv_n.S index 1b65b03f0..c9d345cb1 100644 --- a/kernel/x86_64/qgemv_n.S +++ b/kernel/x86_64/qgemv_n.S @@ -76,7 +76,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qgemv_t.S b/kernel/x86_64/qgemv_t.S index 00188c257..32372ff15 100644 --- a/kernel/x86_64/qgemv_t.S +++ b/kernel/x86_64/qgemv_t.S @@ -75,7 +75,10 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif + subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) movq %rbp, 8(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LN_2x2.S b/kernel/x86_64/qtrsm_kernel_LN_2x2.S index 030eff893..0a545faf8 100644 --- a/kernel/x86_64/qtrsm_kernel_LN_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LN_2x2.S @@ -74,7 +74,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_LT_2x2.S b/kernel/x86_64/qtrsm_kernel_LT_2x2.S index d86972c72..16063fbcd 100644 --- a/kernel/x86_64/qtrsm_kernel_LT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_LT_2x2.S @@ -74,7 +74,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/qtrsm_kernel_RT_2x2.S b/kernel/x86_64/qtrsm_kernel_RT_2x2.S index 2826a62c9..4c94ac02c 100644 --- a/kernel/x86_64/qtrsm_kernel_RT_2x2.S +++ b/kernel/x86_64/qtrsm_kernel_RT_2x2.S @@ -74,8 +74,9 @@ PROLOGUE PROFCODE - fninit - +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/sum.S b/kernel/x86_64/sum.S index 3d5fa7cc2..9f2cdc1ec 100644 --- a/kernel/x86_64/sum.S +++ b/kernel/x86_64/sum.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M diff --git a/kernel/x86_64/xdot.S b/kernel/x86_64/xdot.S index ec89b799c..c4b473494 100644 --- a/kernel/x86_64/xdot.S +++ b/kernel/x86_64/xdot.S @@ -59,7 +59,9 @@ PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif #define N %ebx diff --git a/kernel/x86_64/xgemm3m_kernel_2x2.S b/kernel/x86_64/xgemm3m_kernel_2x2.S index e8da78d82..1d0b23c40 100644 --- a/kernel/x86_64/xgemm3m_kernel_2x2.S +++ b/kernel/x86_64/xgemm3m_kernel_2x2.S @@ -78,7 +78,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xgemm_kernel_1x1.S b/kernel/x86_64/xgemm_kernel_1x1.S index f04ab07f5..ee67d8d43 100644 --- a/kernel/x86_64/xgemm_kernel_1x1.S +++ b/kernel/x86_64/xgemm_kernel_1x1.S @@ -97,7 +97,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xgemv_n.S b/kernel/x86_64/xgemv_n.S index 7d28c118a..b66f28d58 100644 --- a/kernel/x86_64/xgemv_n.S +++ b/kernel/x86_64/xgemv_n.S @@ -76,7 +76,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xgemv_t.S b/kernel/x86_64/xgemv_t.S index e79676088..d6d37010d 100644 --- a/kernel/x86_64/xgemv_t.S +++ b/kernel/x86_64/xgemv_t.S @@ -75,7 +75,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/xtrsm_kernel_LT_1x1.S b/kernel/x86_64/xtrsm_kernel_LT_1x1.S index 54d41932f..875206363 100644 --- a/kernel/x86_64/xtrsm_kernel_LT_1x1.S +++ b/kernel/x86_64/xtrsm_kernel_LT_1x1.S @@ -90,7 +90,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif subq $STACKSIZE, %rsp movq %rbx, 0(%rsp) diff --git a/kernel/x86_64/zamax.S b/kernel/x86_64/zamax.S index bfd836193..5cb2f6019 100644 --- a/kernel/x86_64/zamax.S +++ b/kernel/x86_64/zamax.S @@ -55,7 +55,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/zasum.S b/kernel/x86_64/zasum.S index 9ea2aadc0..3460fcea3 100644 --- a/kernel/x86_64/zasum.S +++ b/kernel/x86_64/zasum.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M diff --git a/kernel/x86_64/zdot.S b/kernel/x86_64/zdot.S index f7df919b7..87c08d7c8 100644 --- a/kernel/x86_64/zdot.S +++ b/kernel/x86_64/zdot.S @@ -54,9 +54,9 @@ PROLOGUE PROFCODE - fninit - #ifdef WINDOWS_ABI + emms + movq 40(%rsp), INCY #endif diff --git a/kernel/x86_64/znrm2.S b/kernel/x86_64/znrm2.S index cb02a5a9f..0d2aa3480 100644 --- a/kernel/x86_64/znrm2.S +++ b/kernel/x86_64/znrm2.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M diff --git a/kernel/x86_64/zscal.S b/kernel/x86_64/zscal.S index 08c0831a4..5ed4c4576 100644 --- a/kernel/x86_64/zscal.S +++ b/kernel/x86_64/zscal.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif salq $ZBASE_SHIFT, INCX diff --git a/kernel/x86_64/zsum.S b/kernel/x86_64/zsum.S index 1c3904839..aa02637e4 100644 --- a/kernel/x86_64/zsum.S +++ b/kernel/x86_64/zsum.S @@ -50,7 +50,9 @@ PROLOGUE PROFCODE - fninit +#ifdef WINDOWS_ABI + emms +#endif fldz testq M, M