From eb4879e04cb93cb112c2ec5ec79cb4a837b5d517 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Jul 2024 23:24:19 +0200 Subject: [PATCH 01/12] make NAN handling depend on the dummy2 parameter --- kernel/arm64/scal.S | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/kernel/arm64/scal.S b/kernel/arm64/scal.S index 5029890f6..33400b630 100644 --- a/kernel/arm64/scal.S +++ b/kernel/arm64/scal.S @@ -33,7 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define X_COPY x5 /* X vector address */ #define INC_X x4 /* X stride */ #define I x1 /* loop variable */ - +#define FLAG x9 /******************************************************************************* * Macro definitions *******************************************************************************/ @@ -168,9 +168,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp N, xzr ble .Lscal_kernel_L999 - //fcmp DA, #0.0 - //beq .Lscal_kernel_zero + ldr FLAG, [sp] + cmp FLAG, #1 + beq .Lscal_kernel_nansafe + fcmp DA, #0.0 + beq .Lscal_kernel_zero + +.Lscal_kernel_nansafe: cmp INC_X, #1 bne .Lscal_kernel_S_BEGIN From b9bfc8ce095754231912686c3a9e41c7a8a2df94 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Jul 2024 23:29:50 +0200 Subject: [PATCH 02/12] make NAN handling depend on dummy2 parameter --- kernel/power/scal.S | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/kernel/power/scal.S b/kernel/power/scal.S index 7d3e23245..5e92a88aa 100644 --- a/kernel/power/scal.S +++ b/kernel/power/scal.S @@ -47,9 +47,11 @@ #ifndef __64BIT__ #define X r6 #define INCX r7 +#define FLAG r11 #else #define X r7 #define INCX r8 +#define FLAG r12 #endif #endif @@ -57,9 +59,11 @@ #if !defined(__64BIT__) && defined(DOUBLE) #define X r8 #define INCX r9 +#define FLAG r13 #else #define X r7 #define INCX r8 +#define FLAG r12 #endif #endif @@ -84,9 +88,12 @@ cmpwi cr0, N, 0 blelr- cr0 -// fcmpu cr0, FZERO, ALPHA -// bne- cr0, LL(A1I1) - b LL(A1I1) + fcmpu cr0, FZERO, ALPHA + bne- cr0, LL(A1I1) + + ld FLAG, 48+64+8(SP) + cmpwi cr0, FLAG, 1 + beq- cr0, LL(A1I1) cmpwi cr0, INCX, SIZE bne- cr0, LL(A0IN) From 73751218a42d23017cc0f7899d9e52862867e5a3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Jul 2024 23:41:26 +0200 Subject: [PATCH 03/12] make NAN handling depend on dummy2 parameter --- kernel/arm/scal.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/kernel/arm/scal.c b/kernel/arm/scal.c index 4455d7643..1f96f9b95 100644 --- a/kernel/arm/scal.c +++ b/kernel/arm/scal.c @@ -43,9 +43,22 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( (n <= 0) || (inc_x <= 0)) return(0); + if (dummy2 == 0) + while(j < n) + { - while(j < n) - { + if ( da == 0.0 ) + x[i]=0.0; + else + x[i] = da * x[i] ; + + i += inc_x ; + j++; + } + } else { + + while(j < n) + { if ( da == 0.0 ) if (!isnan(x[i]) && !isinf(x[i])) { @@ -59,6 +72,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS i += inc_x ; j++; + } } return 0; From 7284c533b56748975c1794058f61e3d044c24ec2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Jul 2024 23:50:40 +0200 Subject: [PATCH 04/12] make NAN handling depend on dummy2 parameter --- kernel/riscv64/scal.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/kernel/riscv64/scal.c b/kernel/riscv64/scal.c index 6c713aa18..bebbed67e 100644 --- a/kernel/riscv64/scal.c +++ b/kernel/riscv64/scal.c @@ -43,9 +43,9 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( (n <= 0) || (inc_x <= 0)) return(0); - - while(j < n) - { + if (dummy2 == 0) { + while(j < n) + { if ( da == 0.0 ) if (isfinite(x[i])) @@ -57,7 +57,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS i += inc_x ; j++; + } + } else { + while(j < n) + { + if ( da == 0.0 ) + x[i]=0.0; + else + x[i] = da * x[i] ; + + i += inc_x ; + j++; + } } return 0; From 3870995f01d731dc80861e7760a8216025c639ab Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Jul 2024 23:54:24 +0200 Subject: [PATCH 05/12] make NAN handling depend on dummy2 parameter --- kernel/riscv64/scal_vector.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/riscv64/scal_vector.c b/kernel/riscv64/scal_vector.c index a1ba41c4f..4792b514c 100644 --- a/kernel/riscv64/scal_vector.c +++ b/kernel/riscv64/scal_vector.c @@ -71,7 +71,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS FLOAT_V_T v0, v1; unsigned int gvl = 0; if(inc_x == 1){ - if (0){ //if(da == 0.0){ + if(dummy2 == 0 && da == 0.0){ memset(&x[0], 0, n * sizeof(FLOAT)); }else{ gvl = VSETVL(n); @@ -96,7 +96,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS } } }else{ - if (0) { //if(da == 0.0){ + if(dummy2 == 0 && da == 0.0){ BLASLONG stride_x = inc_x * sizeof(FLOAT); BLASLONG ix = 0; gvl = VSETVL(n); From 2020569705b4fcf40e5f4e6aa7e600b038f295bc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 17 Jul 2024 23:55:54 +0200 Subject: [PATCH 06/12] fix NAN handling and make it depend on dummy2 parameter --- kernel/riscv64/scal_rvv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/riscv64/scal_rvv.c b/kernel/riscv64/scal_rvv.c index 2c273fb63..827ab120a 100644 --- a/kernel/riscv64/scal_rvv.c +++ b/kernel/riscv64/scal_rvv.c @@ -56,7 +56,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS FLOAT_V_T v0; if(inc_x == 1) { - if(da == 0.0) { + if(dummy2 == 0 && da == 0.0) { int gvl = VSETVL_MAX; v0 = VFMVVF_FLOAT(0.0, gvl); for (size_t vl; n > 0; n -= vl, x += vl) { @@ -75,7 +75,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS } else { BLASLONG stride_x = inc_x * sizeof(FLOAT); - if(da == 0.0) { + if(dummy2 == 0 && da == 0.0) { int gvl = VSETVL_MAX; v0 = VFMVVF_FLOAT(0.0, gvl); for (size_t vl; n > 0; n -= vl, x += vl*inc_x) { From dd6c33d34d9925a7c02284c47a86afa4bf117237 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 19 Jul 2024 16:14:55 +0200 Subject: [PATCH 07/12] make NAN handling depend on dummy2 parameter --- kernel/x86/scal.S | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/kernel/x86/scal.S b/kernel/x86/scal.S index b0c232b1b..7e12a52ab 100644 --- a/kernel/x86/scal.S +++ b/kernel/x86/scal.S @@ -57,19 +57,24 @@ #ifdef XDOUBLE movl 44(%esp),%edi movl 48(%esp),%esi + movl 64(%esp),%ecx #elif defined(DOUBLE) movl 36(%esp),%edi movl 40(%esp),%esi + movl 56(%esp),%ecx #else movl 32(%esp),%edi movl 36(%esp),%esi + movl 54(%esp),%ecx #endif ftst fnstsw %ax andb $68, %ah -// je .L300 # Alpha != ZERO - jmp .L300 + je .L300 # Alpha != ZERO + + cmpl $1,%ecx # dummy2 flag + je .L300 /* Alpha == ZERO */ cmpl $1,%esi From c2ffd90e8c2cfc93c61d72bcef6fd19bb169e072 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 20 Jul 2024 17:31:00 +0200 Subject: [PATCH 08/12] make NAN handling depend on dummy2 parameter --- kernel/x86_64/scal_sse.S | 6 +++++- kernel/x86_64/scal_sse2.S | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/kernel/x86_64/scal_sse.S b/kernel/x86_64/scal_sse.S index 91149af3f..88ef4a3ed 100644 --- a/kernel/x86_64/scal_sse.S +++ b/kernel/x86_64/scal_sse.S @@ -60,8 +60,10 @@ #ifdef WINDOWS_ABI movq 40(%rsp), X movq 48(%rsp), INCX - + movq 64(%rsp), %r9 movaps %xmm3, %xmm0 +#else + movq 24(%rsp), %r9 #endif SAVEREGISTERS @@ -76,6 +78,8 @@ shufps $0, %xmm0, %xmm0 jne .L100 # Alpha != ZERO + + cmpq $1, %r9 je .L100 /* Alpha == ZERO */ cmpq $SIZE, INCX diff --git a/kernel/x86_64/scal_sse2.S b/kernel/x86_64/scal_sse2.S index b778895ba..485e6ef46 100644 --- a/kernel/x86_64/scal_sse2.S +++ b/kernel/x86_64/scal_sse2.S @@ -48,6 +48,7 @@ #define X ARG2 #define INCX ARG3 #endif +#define FLAG %r9 #define XX %r10 #define I %rax @@ -60,8 +61,10 @@ #ifdef WINDOWS_ABI movq 40(%rsp), X movq 48(%rsp), INCX - + movq 64(%rsp), FLAG movaps %xmm3, %xmm0 +#else + movq 24(%rsp), FLAG #endif SAVEREGISTERS @@ -75,6 +78,8 @@ comisd %xmm0, %xmm1 jne .L100 # Alpha != ZERO jp .L100 # For Alpha = NaN + + cmpq $1, FLAG je .L100 # disable the Alpha=zero path as it does not handle x=inf or nan /* Alpha == ZERO */ cmpq $SIZE, INCX From c064319ecb47838babe32999adf2149ed4f5cd83 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 20 Jul 2024 17:42:31 +0200 Subject: [PATCH 09/12] fix alpha=NAN case --- kernel/x86_64/zscal_sse.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/zscal_sse.S b/kernel/x86_64/zscal_sse.S index 8505c67bf..acd6c3654 100644 --- a/kernel/x86_64/zscal_sse.S +++ b/kernel/x86_64/zscal_sse.S @@ -76,7 +76,7 @@ pxor %xmm15, %xmm15 comiss %xmm0, %xmm15 jne .L100 # Alpha_r != ZERO - + jp .L100 # Alpha_r == NAN comiss %xmm1, %xmm15 jne .L100 # Alpha_i != ZERO From dfbc2348a88cbc20ec8016ed65a740639c3e45ae Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 20 Jul 2024 18:27:15 +0200 Subject: [PATCH 10/12] fix NAN handling --- kernel/x86_64/scal_atom.S | 8 +++++++- kernel/x86_64/zscal_atom.S | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/kernel/x86_64/scal_atom.S b/kernel/x86_64/scal_atom.S index 11350ea19..284ea4515 100644 --- a/kernel/x86_64/scal_atom.S +++ b/kernel/x86_64/scal_atom.S @@ -60,8 +60,10 @@ #ifdef WINDOWS_ABI movq 40(%rsp), X movq 48(%rsp), INCX - + movq 64(%rsp), %r9 movaps %xmm3, %xmm0 +#else + movq 24(%rsp), %r9 #endif SAVEREGISTERS @@ -73,6 +75,10 @@ lea (, INCX, SIZE), INCX comisd %xmm0, %xmm1 jne .L100 + jp .L100 + + cmpq $1, %r9 + je .L100 /* Alpha == ZERO */ cmpq $SIZE, INCX diff --git a/kernel/x86_64/zscal_atom.S b/kernel/x86_64/zscal_atom.S index 1649b855b..7713626c9 100644 --- a/kernel/x86_64/zscal_atom.S +++ b/kernel/x86_64/zscal_atom.S @@ -74,7 +74,7 @@ pxor %xmm15, %xmm15 comisd %xmm0, %xmm15 jne .L30 # Alpha_r != ZERO - + jp .L30 comisd %xmm1, %xmm15 jne .L30 # Alpha_i != ZERO From 73f8866ffba84ca7c8f75b7e59030ce0611e5528 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 21 Jul 2024 13:42:47 +0200 Subject: [PATCH 11/12] make NAN handling depend on DUMMY2 parameter --- kernel/power/dscal.c | 34 +++++++++++++++++++++++++++++++--- kernel/power/sscal.c | 44 ++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 71 insertions(+), 7 deletions(-) diff --git a/kernel/power/dscal.c b/kernel/power/dscal.c index 2bbc1ea6d..da8083976 100644 --- a/kernel/power/dscal.c +++ b/kernel/power/dscal.c @@ -73,6 +73,15 @@ static void dscal_kernel_8_zero (BLASLONG n, FLOAT *x) for( i=0; i= 16 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3; + if (dummy2 == 0) + for (j = 0; j < align; j++) { + x [j] = 0.0; + } + else for (j = 0; j < align; j++) { if (isfinite(x[j])) x[j] = 0.0; @@ -151,7 +166,13 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS j=n1; } #endif - + if (dummy2 == 0) + while(j < n) + { + x[j]=0.0; + j++; + } + else while(j < n) { if (!isfinite(x[j])) @@ -202,7 +223,14 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { - + if (dummy2 == 0) + while(j < n) + { + x[i]=0.0; + i += inc_x; + j++; + } + else while(j < n) { if (!isfinite(x[i])) diff --git a/kernel/power/sscal.c b/kernel/power/sscal.c index 12246b0a3..54047a856 100644 --- a/kernel/power/sscal.c +++ b/kernel/power/sscal.c @@ -74,7 +74,24 @@ static void sscal_kernel_16_zero( BLASLONG n, FLOAT *x ) for( i=0; i= 32 ) { BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7; + if (dummy2 == 0) + for (j = 0; j < align; j++){ + x[j] = 0.0; + } + else for (j = 0; j < align; j++) { if (isfinite(x[j])) x[j] = 0.0; @@ -153,9 +176,15 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS j=n1; } #endif - + if (dummy2 == 0) while(j < n) { + x[j] = 0.0; + j++; + } + else + while(j < n) + { if (isfinite(x[j])) x[j]=0.0; else @@ -204,7 +233,14 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( da == 0.0 ) { - + if (dummy2 == 0) + while(j < n) + { + x[i]=0.0; + i += inc_x; + j++; + } + else while(j < n) { if (isfinite(x[i])) From b613754143d68a2635f0232f29a04318f15e34d3 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 24 Jul 2024 14:31:29 +0200 Subject: [PATCH 12/12] Update scal..c --- kernel/arm/scal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/arm/scal.c b/kernel/arm/scal.c index 1f96f9b95..6a2c37631 100644 --- a/kernel/arm/scal.c +++ b/kernel/arm/scal.c @@ -43,7 +43,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS if ( (n <= 0) || (inc_x <= 0)) return(0); - if (dummy2 == 0) + if (dummy2 == 0) { while(j < n) {