From d6be5036d7495d3471d4e7c0cf8bb791b029e811 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 26 Jun 2023 21:19:33 +0200 Subject: [PATCH 1/3] Fix IDAMAX --- kernel/riscv64/iamax_vector.c | 40 +++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/kernel/riscv64/iamax_vector.c b/kernel/riscv64/iamax_vector.c index 9fea522f7..4242af6ea 100644 --- a/kernel/riscv64/iamax_vector.c +++ b/kernel/riscv64/iamax_vector.c @@ -29,8 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #if defined(DOUBLE) - -#define ABS fabs +#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64 #define VSETVL(n) vsetvl_e64m8(n) #define VSETVL_MAX vsetvlmax_e64m1() #define FLOAT_V_T vfloat64m8_t @@ -54,8 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VADDVX_UINT vadd_vx_u64m8 #define VMVVX_UINT vmv_v_x_u64m8 #else - -#define ABS fabsf +#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32 #define VSETVL(n) vsetvl_e32m8(n) #define VSETVL_MAX vsetvlmax_e32m1() #define FLOAT_V_T vfloat32m8_t @@ -85,7 +83,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { BLASLONG i=0, j=0; FLOAT maxf=0.0; +#ifdef DOUBLE + BLASLONG max_index = 0; +#else unsigned int max_index = 0; +#endif if (n <= 0 || inc_x <= 0) return(max_index); FLOAT_V_T vx, v_max; @@ -117,11 +119,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) j += gvl; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - maxf = *((FLOAT*)&v_res); + maxf = VFMVFS_FLOAT(v_res); mask = VMFGEVF_FLOAT(v_max, maxf, gvl); max_index = VMFIRSTM(mask,gvl); - max_index = *((unsigned int*)&v_max_index+max_index); - +#ifdef DOUBLE + max_index = *((BLASLONG *)&v_max_index+max_index); +#else + max_index = *((unsigned int *)&v_max_index+max_index); +#endif if(j < n){ gvl = VSETVL(n-j); vx = VLEV_FLOAT(&x[j], gvl); @@ -130,7 +135,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_max = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - FLOAT cur_maxf = *((FLOAT*)&v_res); + FLOAT cur_maxf = VFMVFS_FLOAT(v_res); if(cur_maxf > maxf){ //tail index v_max_index = VIDV_UINT(gvl); @@ -138,7 +143,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); max_index = VMFIRSTM(mask,gvl); +#ifdef DOUBLE + max_index = *((BLASLONG*)&v_max_index+max_index); +#else max_index = *((unsigned int*)&v_max_index+max_index); +#endif } } }else{ @@ -165,11 +174,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) idx += inc_v; } v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - maxf = *((FLOAT*)&v_res); + maxf = VFMVFS_FLOAT(v_res); mask = VMFGEVF_FLOAT(v_max, maxf, gvl); max_index = VMFIRSTM(mask,gvl); +#ifdef DOUBLE + max_index = *((BLASLONG*)&v_max_index+max_index); +#else max_index = *((unsigned int*)&v_max_index+max_index); - +#endif if(j < n){ gvl = VSETVL(n-j); vx = VLSEV_FLOAT(&x[idx], stride_x, gvl); @@ -178,7 +190,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_max = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl); v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl); - FLOAT cur_maxf = *((FLOAT*)&v_res); + FLOAT cur_maxf = VFMVFS_FLOAT(v_res); if(cur_maxf > maxf){ //tail index v_max_index = VIDV_UINT(gvl); @@ -186,11 +198,13 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl); max_index = VMFIRSTM(mask,gvl); +#ifdef DOUBLE + max_index = *((BLASLONG*)&v_max_index+max_index); +#else max_index = *((unsigned int*)&v_max_index+max_index); +#endif } } } return(max_index+1); } - - From 772b0cc71514409511eb4efd4b5770ae40b2f4e7 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 27 Jun 2023 16:12:27 +0200 Subject: [PATCH 2/3] Fix early bailout --- kernel/riscv64/dot_vector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/riscv64/dot_vector.c b/kernel/riscv64/dot_vector.c index f47e0c0b5..cc27d68ed 100644 --- a/kernel/riscv64/dot_vector.c +++ b/kernel/riscv64/dot_vector.c @@ -63,7 +63,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) BLASLONG i=0, j=0; double dot = 0.0 ; - if ( n < 0 ) return(dot); + if ( n < 1 ) return(dot); FLOAT_V_T vr, vx, vy; unsigned int gvl = 0; From ceaee7dc645da97e5e99ee7c0b2a9be8709b32c2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Tue, 27 Jun 2023 16:13:23 +0200 Subject: [PATCH 3/3] remove the limitation to -O1 again --- Makefile.riscv64 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.riscv64 b/Makefile.riscv64 index 0246c0f7d..ce91e03ec 100644 --- a/Makefile.riscv64 +++ b/Makefile.riscv64 @@ -1,4 +1,4 @@ ifeq ($(CORE), C910V) -CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -O1 +CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static endif