From e1afb23811256b231c259ca57d7a5f6e81ac6da5 Mon Sep 17 00:00:00 2001 From: kseniyazaytseva Date: Fri, 7 Apr 2023 11:13:23 +0300 Subject: [PATCH 1/3] Fix BLAS and LAPACK tests for C910V and RISCV64_ZVL256B targets * Fixed bugs in dgemm, [a]min\max, asum kernels * Added zero checks for BLAS kernels * Added dsdot implementation for RVV 0.7.1 * Fixed bugs in _vector files for C910V and RISCV64_ZVL256B targets * Added additional definitions for RISCV64_ZVL256B target --- Makefile.prebuild | 4 + Makefile.riscv64 | 4 + TargetList.txt | 1 + getarch.c | 14 +++ kernel/riscv64/KERNEL.C910V | 1 + kernel/riscv64/amin_vector.c | 6 +- kernel/riscv64/asum_vector.c | 7 +- kernel/riscv64/axpby_vector.c | 2 +- kernel/riscv64/dgemm_kernel_8x4_c910v.c | 2 +- kernel/riscv64/dsdot_vector.c | 152 ++++++++++++++++++++++++ kernel/riscv64/iamin_vector.c | 4 +- kernel/riscv64/izamin_vector.c | 2 +- kernel/riscv64/nrm2_vector.c | 2 +- kernel/riscv64/nrm2_vector_dot.c | 2 +- kernel/riscv64/swap_vector.c | 2 +- kernel/riscv64/zamax_vector.c | 17 +-- kernel/riscv64/zamin_vector.c | 17 +-- kernel/riscv64/znrm2_vector.c | 2 +- kernel/riscv64/zswap_vector.c | 2 +- 19 files changed, 205 insertions(+), 38 deletions(-) create mode 100644 kernel/riscv64/dsdot_vector.c diff --git a/Makefile.prebuild b/Makefile.prebuild index c4f4a2602..d30275f06 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -59,6 +59,10 @@ ifeq ($(TARGET), x280) TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d endif +ifeq ($(TARGET), RISCV64_ZVL256B) +TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d +endif + ifeq ($(TARGET), RISCV64_GENERIC) TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d endif diff --git a/Makefile.riscv64 b/Makefile.riscv64 index ce7a27141..2239a3676 100644 --- a/Makefile.riscv64 +++ b/Makefile.riscv64 @@ -6,6 +6,10 @@ ifeq ($(CORE), x280) CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static endif +ifeq ($(CORE), RISCV64_ZVL256B) +CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl256b -mabi=lp64d +FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static +endif ifeq ($(CORE), RISCV64_GENERIC) CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static diff --git a/TargetList.txt b/TargetList.txt index f76f605cc..f65a18b50 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -121,6 +121,7 @@ Z14 RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54) C910V x280 +RISCV64_ZVL256B 11.LOONGARCH64: LOONGSONGENERIC diff --git a/getarch.c b/getarch.c index 772836347..12ea72052 100644 --- a/getarch.c +++ b/getarch.c @@ -1692,6 +1692,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_RISCV64_ZVL256B +#define FORCE +#define ARCHITECTURE "RISCV64" +#define SUBARCHITECTURE "RISCV64_ZVL256B" +#define SUBDIRNAME "riscv64" +#define ARCHCONFIG "-DRISCV64_ZVL256B " \ + "-DL1_DATA_SIZE=64536 -DL1_DATA_LINESIZE=32 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=32 " \ + "-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " +#define LIBNAME "riscv64_zvl256b" +#define CORENAME "RISCV64_ZVL256B" +#else +#endif + #if defined(FORCE_E2K) || defined(__e2k__) #define FORCE diff --git a/kernel/riscv64/KERNEL.C910V b/kernel/riscv64/KERNEL.C910V index 0da66fa35..2798a870e 100644 --- a/kernel/riscv64/KERNEL.C910V +++ b/kernel/riscv64/KERNEL.C910V @@ -59,6 +59,7 @@ SDOTKERNEL = dot_vector.c DDOTKERNEL = dot_vector.c CDOTKERNEL = zdot_vector.c ZDOTKERNEL = zdot_vector.c +DSDOTKERNEL = dsdot_vector.c SNRM2KERNEL = nrm2_vector.c DNRM2KERNEL = nrm2_vector.c diff --git a/kernel/riscv64/amin_vector.c b/kernel/riscv64/amin_vector.c index 1c541f0fd..c4578eabf 100644 --- a/kernel/riscv64/amin_vector.c +++ b/kernel/riscv64/amin_vector.c @@ -31,15 +31,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # define LMUL m2 # if defined(DOUBLE) # define ELEN 64 +# define ABS fabs # else # define ELEN 32 +# define ABS fabsf # endif #else # define LMUL m8 # if defined(DOUBLE) # define ELEN 64 +# define ABS fabs # else # define ELEN 32 +# define ABS fabsf # endif #endif @@ -69,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) FLOAT minf=0.0; if (n <= 0 || inc_x <= 0) return(minf); - minf = *x; + minf = ABS(*x); x += inc_x; --n; if (n == 0) return(minf); diff --git a/kernel/riscv64/asum_vector.c b/kernel/riscv64/asum_vector.c index 995dbf9a1..a652eafdd 100644 --- a/kernel/riscv64/asum_vector.c +++ b/kernel/riscv64/asum_vector.c @@ -67,7 +67,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { BLASLONG i=0, j=0; - BLASLONG ix=0; FLOAT asumf=0.0; if (n <= 0 || inc_x <= 0) return(asumf); unsigned int gvl = 0; @@ -103,17 +102,15 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) unsigned int stride_x = inc_x * sizeof(FLOAT); if(gvl <= n/2){ v_sum = VFMVVF_FLOAT(0, gvl); - BLASLONG inc_xv = inc_x * gvl; for(i=0,j=0; i 0){ + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + } + //tail + if(j < n){ + gvl = vsetvl_e64m4(n-j); + vx = vle32_v_f32m2(&x[j], gvl); + vy = vle32_v_f32m2(&y[j], gvl); + vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); + //vr = vfdot_vv_f32m2(vx, vy, gvl); + vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + } + }else if(inc_y == 1){ + gvl = vsetvl_e64m4(n); + vr = vfmv_v_f_f64m4(0, gvl); + int stride_x = inc_x * sizeof(FLOAT); + for(i=0,j=0; i 0){ + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + + } + //tail + if(j < n){ + gvl = vsetvl_e64m4(n-j); + vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl); + vy = vle32_v_f32m2(&y[j], gvl); + vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); + //vr = vfdot_vv_f32m2(vx, vy, gvl); + vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + + } + }else if(inc_x == 1){ + gvl = vsetvl_e64m4(n); + vr = vfmv_v_f_f64m4(0, gvl); + int stride_y = inc_y * sizeof(FLOAT); + for(i=0,j=0; i 0){ + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + + } + //tail + if(j < n){ + gvl = vsetvl_e64m4(n-j); + vx = vle32_v_f32m2(&x[j], gvl); + vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl); + vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); + //vr = vfdot_vv_f32m2(vx, vy, gvl); + vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + + } + }else{ + gvl = vsetvl_e64m4(n); + vr = vfmv_v_f_f64m4(0, gvl); + int stride_x = inc_x * sizeof(FLOAT); + int stride_y = inc_y * sizeof(FLOAT); + for(i=0,j=0; i 0){ + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + + } + //tail + if(j < n){ + gvl = vsetvl_e64m4(n-j); + vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl); + vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl); + vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl); + //vr = vfdot_vv_f32m2(vx, vy, gvl); + vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl); + v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl); + dot += (double)vfmv_f_s_f64m1_f64(v_res); + + } + } + return(dot); +} diff --git a/kernel/riscv64/iamin_vector.c b/kernel/riscv64/iamin_vector.c index a58872960..0e591e697 100644 --- a/kernel/riscv64/iamin_vector.c +++ b/kernel/riscv64/iamin_vector.c @@ -139,7 +139,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl); FLOAT cur_minf = EXTRACT_FLOAT(v_res); - if(cur_minf > minf){ + if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); v_min_index = VADDVX_UINT(v_min_index, j, gvl); @@ -185,7 +185,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl); FLOAT cur_minf = EXTRACT_FLOAT(v_res); - if(cur_minf > minf){ + if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); v_min_index = VADDVX_UINT(v_min_index, j, gvl); diff --git a/kernel/riscv64/izamin_vector.c b/kernel/riscv64/izamin_vector.c index a3877a46c..c76a38099 100644 --- a/kernel/riscv64/izamin_vector.c +++ b/kernel/riscv64/izamin_vector.c @@ -156,7 +156,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl); FLOAT cur_minf = EXTRACT_FLOAT(v_res); - if(cur_minf > minf){ + if(cur_minf < minf){ //tail index v_min_index = VIDV_UINT(gvl); v_min_index = VADDVX_UINT(v_min_index, j, gvl); diff --git a/kernel/riscv64/nrm2_vector.c b/kernel/riscv64/nrm2_vector.c index 141dffebf..5c03fbec7 100644 --- a/kernel/riscv64/nrm2_vector.c +++ b/kernel/riscv64/nrm2_vector.c @@ -104,7 +104,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { BLASLONG i=0; - if(n <= 0) return(0.0); + if (n <= 0 || inc_x <= 0) return(0.0); if(n == 1) return (ABS(x[0])); unsigned int gvl = 0; diff --git a/kernel/riscv64/nrm2_vector_dot.c b/kernel/riscv64/nrm2_vector_dot.c index 06e61d695..dfa13a6f5 100644 --- a/kernel/riscv64/nrm2_vector_dot.c +++ b/kernel/riscv64/nrm2_vector_dot.c @@ -61,7 +61,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) BLASLONG i=0, j=0; double len = 0.0 ; - if ( n < 0 ) return(0.0); + if ( n <= 0 ) return(0.0); if(n == 1) return (ABS(x[0])); FLOAT_V_T vr, v0, v1; diff --git a/kernel/riscv64/swap_vector.c b/kernel/riscv64/swap_vector.c index 3b467a586..f583f5392 100644 --- a/kernel/riscv64/swap_vector.c +++ b/kernel/riscv64/swap_vector.c @@ -67,7 +67,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG stride_x, stride_y; FLOAT_V_T vx0, vx1, vy0, vy1; - if (n < 0) return(0); + if (n <= 0) return(0); unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1); if( inc_x == 0 && inc_y == 0 ) { n = n & 1; } diff --git a/kernel/riscv64/zamax_vector.c b/kernel/riscv64/zamax_vector.c index 2dee5ab29..ec4a5a1e9 100644 --- a/kernel/riscv64/zamax_vector.c +++ b/kernel/riscv64/zamax_vector.c @@ -60,17 +60,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) #ifdef RISCV_0p10_INTRINSICS #define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl) -#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl) #else #define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) #endif #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) #define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) #define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) #define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _) #define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) +#define VFABSV_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { @@ -91,10 +89,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) for(; i Date: Wed, 24 Jan 2024 10:53:13 +0300 Subject: [PATCH 2/3] Fix x280 taget include riscv_vector.h --- common_riscv64.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common_riscv64.h b/common_riscv64.h index 4b5f7dcc4..ab3bfa25a 100644 --- a/common_riscv64.h +++ b/common_riscv64.h @@ -91,7 +91,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define BUFFER_SIZE ( 32 << 20) #define SEEK_ADDRESS -#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B) +#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B) || defined(x280) # include #endif From 73530b03fa6ecd03e7ceb2b37c234a0bb1626445 Mon Sep 17 00:00:00 2001 From: Andrey Sokolov Date: Wed, 24 Jan 2024 11:38:14 +0300 Subject: [PATCH 3/3] remove RISCV64_ZVL256B additional extentions --- Makefile.prebuild | 2 +- Makefile.riscv64 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile.prebuild b/Makefile.prebuild index 7824e15a8..98acca80e 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -60,7 +60,7 @@ TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d endif ifeq ($(TARGET), RISCV64_ZVL256B) -TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d +TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d endif ifeq ($(TARGET), RISCV64_ZVL128B) diff --git a/Makefile.riscv64 b/Makefile.riscv64 index 9d314d074..113cc57c5 100644 --- a/Makefile.riscv64 +++ b/Makefile.riscv64 @@ -7,8 +7,8 @@ CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static endif ifeq ($(CORE), RISCV64_ZVL256B) -CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl256b -mabi=lp64d -FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static +CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d +FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static endif ifeq ($(CORE), RISCV64_ZVL128B) CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d