diff --git a/common_riscv64.h b/common_riscv64.h index de79c8cab..f11e8b75d 100644 --- a/common_riscv64.h +++ b/common_riscv64.h @@ -91,12 +91,23 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define BUFFER_SIZE ( 32 << 20) #define SEEK_ADDRESS -#if defined(C910V) || defined(RISCV64_ZVL256B) || defined(__riscv_v) +#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) # include +#endif + +#if defined( __riscv_xtheadc ) && defined( __riscv_v ) && ( __riscv_v <= 7000 ) +// t-head toolchain uses obsolete rvv intrinsics, can't build for C910V without this +#define RISCV_0p10_INTRINSICS +#define RISCV_RVV(x) x +#else +#define RISCV_RVV(x) __riscv_ ## x +#endif + +#if defined(C910V) || defined(RISCV64_ZVL256B) # if !defined(DOUBLE) -# define EXTRACT_FLOAT(v) __riscv_vfmv_f_s_f32m1_f32(v) +# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f32m1_f32)(v) # else -# define EXTRACT_FLOAT(v) __riscv_vfmv_f_s_f64m1_f64(v) +# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f64m1_f64)(v) # endif #else # define EXTRACT_FLOAT(v) (v[0]) diff --git a/cpuid_riscv64.c b/cpuid_riscv64.c index 15a539c20..c3d674357 100644 --- a/cpuid_riscv64.c +++ b/cpuid_riscv64.c @@ -72,12 +72,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CPU_GENERIC 0 #define CPU_C910V 1 -#define CPU_RISCV64_ZVL256B 2 -#define CPU_RISCV64_ZVL128B 3 +#define CPU_x280 2 +#define CPU_RISCV64_ZVL256B 3 +#define CPU_RISCV64_ZVL128B 4 static char *cpuname[] = { "RISCV64_GENERIC", "C910V", + "x280", "CPU_RISCV64_ZVL256B", "CPU_RISCV64_ZVL128B" }; diff --git a/kernel/riscv64/amax_rvv.c b/kernel/riscv64/amax_rvv.c index be0bdbea0..451fbc834 100644 --- a/kernel/riscv64/amax_rvv.c +++ b/kernel/riscv64/amax_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else @@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLEV_FLOAT(x, vl); vx = VFABSV_FLOAT(vx, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } else { @@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLSEV_FLOAT(x, stride_x, vl); vx = VFABSV_FLOAT(vx, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } diff --git a/kernel/riscv64/amax_vector.c b/kernel/riscv64/amax_vector.c index 81a39af32..b66d4871e 100644 --- a/kernel/riscv64/amax_vector.c +++ b/kernel/riscv64/amax_vector.c @@ -49,15 +49,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/amin_rvv.c b/kernel/riscv64/amin_rvv.c index d4926084b..5186d7b12 100644 --- a/kernel/riscv64/amin_rvv.c +++ b/kernel/riscv64/amin_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else @@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLEV_FLOAT(x, vl); vx = VFABSV_FLOAT(vx, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } else { @@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLSEV_FLOAT(x, stride_x, vl); vx = VFABSV_FLOAT(vx, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } diff --git a/kernel/riscv64/amin_vector.c b/kernel/riscv64/amin_vector.c index c8ba75f4a..1c541f0fd 100644 --- a/kernel/riscv64/amin_vector.c +++ b/kernel/riscv64/amin_vector.c @@ -48,15 +48,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/asum_rvv.c b/kernel/riscv64/asum_rvv.c index 691591e22..0ea610cbb 100644 --- a/kernel/riscv64/asum_rvv.c +++ b/kernel/riscv64/asum_rvv.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 @@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 @@ -76,7 +76,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLEV_FLOAT(x, vl); vx = VFABSV_FLOAT(vx, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } else { @@ -88,7 +88,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLSEV_FLOAT(x, stride_x, vl); vx = VFABSV_FLOAT(vx, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } diff --git a/kernel/riscv64/asum_vector.c b/kernel/riscv64/asum_vector.c index d10bf99e6..995dbf9a1 100644 --- a/kernel/riscv64/asum_vector.c +++ b/kernel/riscv64/asum_vector.c @@ -49,16 +49,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUMVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/axpby_vector.c b/kernel/riscv64/axpby_vector.c index b77cb58fb..386c4a5f1 100644 --- a/kernel/riscv64/axpby_vector.c +++ b/kernel/riscv64/axpby_vector.c @@ -48,15 +48,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) -#define VFMACCVF_FLOAT JOIN(__riscv_vfmacc, _vf_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMULVF_FLOAT JOIN(__riscv_vfmul, _vf_f, ELEN, LMUL, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) +#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMULVF_FLOAT JOIN(RISCV_RVV(vfmul), _vf_f, ELEN, LMUL, _) int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) { diff --git a/kernel/riscv64/axpy_vector.c b/kernel/riscv64/axpy_vector.c index 3447107a6..e99ca8542 100644 --- a/kernel/riscv64/axpy_vector.c +++ b/kernel/riscv64/axpy_vector.c @@ -49,13 +49,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) -#define VFMACCVF_FLOAT JOIN(__riscv_vfmacc, _vf_f, ELEN, LMUL, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) +#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) { diff --git a/kernel/riscv64/copy_vector.c b/kernel/riscv64/copy_vector.c index 710e8670a..ccbd6e482 100644 --- a/kernel/riscv64/copy_vector.c +++ b/kernel/riscv64/copy_vector.c @@ -47,12 +47,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { @@ -71,7 +71,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) stride_x = inc_x * sizeof(FLOAT); if(gvl <= n/4){ BLASLONG inc_xv = inc_x * gvl; - BLASLONG gvl3 = gvl * 3; + unsigned int gvl3 = gvl * 3; BLASLONG inc_xv3 = inc_xv * 3; for(i=0,j=0; i 32: - raise Exception("{} vector registers needed during accumulation for unrolling {} x {}{} but only 32 are available".format( - required_regs, N, M, (" with wide accumulator" if settings['LMUL_ACC'].value > 1 else '') + if required_regs > (32 // settings['LMUL_ACC'].value): + raise Exception("{} vector registers needed during accumulation for unrolling {} x {}{} but only {} are available".format( + required_regs, N, M, (" with wide accumulator" if settings['LMUL_ACC'].value > 1 else ''), 32 // settings['LMUL_ACC'].value )) TRMM = (settings['op'].value == 'trmm') @@ -448,7 +450,8 @@ def generate_gemm_kernel( settings, OUTPUT ): def generate_M_tails( dest, settings, M, N ): M_tail = int(M/2) M_tail_min = settings['M_tail_scalar_from'].value - vlenmax = int( settings['reg_width_bits'].value / settings['ELEN_PARAM'].value ) + vlenmax = int(settings['reg_width_bits'].value * settings['LMUL_ACC'].value + / settings['ELEN_PARAM'].value ) TRMM = (settings['op'].value == 'trmm') is_complex = settings['complex'].value generate_gemm_kernel_inner = generate_gemm_kernel_inner_complex if is_complex else generate_gemm_kernel_inner_real @@ -667,4 +670,4 @@ def main(): ERROR("unsupported kernel type {}".format(settings['op'])) if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/kernel/riscv64/iamax_rvv.c b/kernel/riscv64/iamax_rvv.c index ef7850a55..8362d7cef 100644 --- a/kernel/riscv64/iamax_rvv.c +++ b/kernel/riscv64/iamax_rvv.c @@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -68,12 +68,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -106,11 +106,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } else { @@ -125,11 +125,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } diff --git a/kernel/riscv64/iamax_vector.c b/kernel/riscv64/iamax_vector.c index 92880fbcf..800312400 100644 --- a/kernel/riscv64/iamax_vector.c +++ b/kernel/riscv64/iamax_vector.c @@ -31,50 +31,62 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f64m4_f64m1(v_res, va, vb, gvl) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m4_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m4)(vm, compressed, va, gvl) +#else +#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f64m4_f64m1) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m4_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m4) +#endif #define MASK_T vbool16_t -#define VMFLTVV_FLOAT __riscv_vmflt_vv_f64m4_b16 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4 -#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m4_b16 -#define VMFIRSTM __riscv_vfirst_m_b16 +#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f64m4_b16) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f64m4) +#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f64m4_b16) +#define VMFIRSTM RISCV_RVV(vfirst_m_b16) #define UINT_V_T vuint64m4_t -#define VIDV_UINT __riscv_vid_v_u64m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu -#define VADDVX_UINT __riscv_vadd_vx_u64m4 -#define VMVVX_UINT __riscv_vmv_v_x_u64m4 -#define VFABS_FLOAT __riscv_vfabs_v_f64m4 -#define VCOMPRESS __riscv_vcompress_vm_u64m4 -#define VMV_X __riscv_vmv_x_s_u64m4_u64 +#define VIDV_UINT RISCV_RVV(vid_v_u64m4) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m4) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m4) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m4) +#define VMV_X RISCV_RVV(vmv_x_s_u64m4_u64) #else -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f32m4_f32m1(v_res, va, vb, gvl) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m4_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m4)(vm, compressed, va, gvl) +#else +#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f32m4_f32m1) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m4_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m4) +#endif #define MASK_T vbool8_t -#define VMFLTVV_FLOAT __riscv_vmflt_vv_f32m4_b8 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4 -#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m4_b8 -#define VMFIRSTM __riscv_vfirst_m_b8 +#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f32m4_b8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f32m4) +#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f32m4_b8) +#define VMFIRSTM RISCV_RVV(vfirst_m_b8) #define UINT_V_T vuint32m4_t -#define VIDV_UINT __riscv_vid_v_u32m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu -#define VADDVX_UINT __riscv_vadd_vx_u32m4 -#define VMVVX_UINT __riscv_vmv_v_x_u32m4 -#define VFABS_FLOAT __riscv_vfabs_v_f32m4 -#define VCOMPRESS __riscv_vcompress_vm_u32m4 -#define VMV_X __riscv_vmv_x_s_u32m4_u32 +#define VIDV_UINT RISCV_RVV(vid_v_u32m4) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m4) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m4) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m4) +#define VMV_X RISCV_RVV(vmv_x_s_u32m4_u32) #endif diff --git a/kernel/riscv64/iamin_rvv.c b/kernel/riscv64/iamin_rvv.c index 56a086fed..f90dbb545 100644 --- a/kernel/riscv64/iamin_rvv.c +++ b/kernel/riscv64/iamin_rvv.c @@ -43,12 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -69,12 +69,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -107,11 +107,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } else { @@ -126,11 +126,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } diff --git a/kernel/riscv64/iamin_vector.c b/kernel/riscv64/iamin_vector.c index 0503f9948..a58872960 100644 --- a/kernel/riscv64/iamin_vector.c +++ b/kernel/riscv64/iamin_vector.c @@ -31,52 +31,66 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n) #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m8 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f64m8_f64m1(v_res, va, vb, gvl) +#define VIDV_MASK_UINT vid_v_u64m8_m +#define VADDVX_MASK_UINT vadd_vx_u64m8_m +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl) +#else #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 -#define MASK_T vbool8_t -#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f64m8_b8 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 -#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8 -#define VMFIRSTM __riscv_vfirst_m_b8 -#define UINT_V_T vuint64m8_t #define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu -#define VIDV_UINT __riscv_vid_v_u64m8 #define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u64m8 -#define VMVVX_UINT __riscv_vmv_v_x_u64m8 -#define VFABS_FLOAT __riscv_vfabs_v_f64m8 -#define VCOMPRESS __riscv_vcompress_vm_u64m8 -#define VMV_X __riscv_vmv_x_s_u64m8_u64 +#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8) +#endif +#define MASK_T vbool8_t +#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f64m8_b8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f64m8) +#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f64m8_b8) +#define VMFIRSTM RISCV_RVV(vfirst_m_b8) +#define UINT_V_T vuint64m8_t +#define VIDV_UINT RISCV_RVV(vid_v_u64m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m8) +#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64) #else -#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n) #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m8 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f32m8_f32m1(v_res, va, vb, gvl) +#define VIDV_MASK_UINT vid_v_u32m8_m +#define VADDVX_MASK_UINT vadd_vx_u32m8_m +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl) +#else #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 -#define MASK_T vbool4_t -#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f32m8_b4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 -#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4 -#define VMFIRSTM __riscv_vfirst_m_b4 -#define UINT_V_T vuint32m8_t #define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu -#define VIDV_UINT __riscv_vid_v_u32m8 #define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u32m8 -#define VMVVX_UINT __riscv_vmv_v_x_u32m8 -#define VFABS_FLOAT __riscv_vfabs_v_f32m8 -#define VCOMPRESS __riscv_vcompress_vm_u32m8 -#define VMV_X __riscv_vmv_x_s_u32m8_u32 +#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8) +#endif +#define MASK_T vbool4_t +#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f32m8_b4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f32m8) +#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f32m8_b4) +#define VMFIRSTM RISCV_RVV(vfirst_m_b4) +#define UINT_V_T vuint32m8_t +#define VIDV_UINT RISCV_RVV(vid_v_u32m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m8) +#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32) #endif diff --git a/kernel/riscv64/imax_rvv.c b/kernel/riscv64/imax_rvv.c index 5b60a56f7..b1a77b178 100644 --- a/kernel/riscv64/imax_rvv.c +++ b/kernel/riscv64/imax_rvv.c @@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } else { @@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } diff --git a/kernel/riscv64/imax_vector.c b/kernel/riscv64/imax_vector.c index e24f9fd48..308fa15a4 100644 --- a/kernel/riscv64/imax_vector.c +++ b/kernel/riscv64/imax_vector.c @@ -31,50 +31,64 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n) #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m8 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f64m8_f64m1(v_res, va, vb, gvl) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl) +#else +#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f64m8_f64m1) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_mu) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8) +#endif #define MASK_T vbool8_t -#define VMFLTVV_FLOAT __riscv_vmflt_vv_f64m8_b8 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 -#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8 -#define VMFIRSTM __riscv_vfirst_m_b8 +#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f64m8_b8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f64m8) +#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f64m8_b8) +#define VMFIRSTM RISCV_RVV(vfirst_m_b8) #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu -#define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u64m8 -#define VMVVX_UINT __riscv_vmv_v_x_u64m8 -#define VCOMPRESS __riscv_vcompress_vm_u64m8 -#define VMV_X __riscv_vmv_x_s_u64m8_u64 +#define VIDV_UINT RISCV_RVV(vid_v_u64m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8) +#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64) #else -#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n) #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m8 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) vfredmax_vs_f32m8_f32m1(v_res, va, vb, gvl) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_m) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl) +#else +#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f32m8_f32m1) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_mu) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8) +#endif #define MASK_T vbool4_t -#define VMFLTVV_FLOAT __riscv_vmflt_vv_f32m8_b4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 -#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4 -#define VMFIRSTM __riscv_vfirst_m_b4 +#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f32m8_b4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f32m8) +#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f32m8_b4) +#define VMFIRSTM RISCV_RVV(vfirst_m_b4) #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu -#define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u32m8 -#define VMVVX_UINT __riscv_vmv_v_x_u32m8 -#define VCOMPRESS __riscv_vcompress_vm_u32m8 -#define VMV_X __riscv_vmv_x_s_u32m8_u32 +#define VIDV_UINT RISCV_RVV(vid_v_u32m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8) +#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32) #endif diff --git a/kernel/riscv64/imin_rvv.c b/kernel/riscv64/imin_rvv.c index b49544a1b..1de7f3233 100644 --- a/kernel/riscv64/imin_rvv.c +++ b/kernel/riscv64/imin_rvv.c @@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } else { @@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } diff --git a/kernel/riscv64/imin_vector.c b/kernel/riscv64/imin_vector.c index a60bd3d07..ec36b8eb9 100644 --- a/kernel/riscv64/imin_vector.c +++ b/kernel/riscv64/imin_vector.c @@ -31,50 +31,64 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n) #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m8 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 -#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f64m8_f64m1(v_res, va, vb, gvl) +#define VIDV_MASK_UINT(mask, gvl) RISCV_RVV(vid_v_u64m8_m)(mask, v_min_index, gvl) +#define VADDVX_MASK_UINT(mask, a, b, gvl) RISCV_RVV(vadd_vx_u64m8_m)(mask, a, a, b, gvl) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl) +#else +#define VFREDMINVS_FLOAT RISCV_RVV(vfredmin_vs_f64m8_f64m1) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8) +#endif #define MASK_T vbool8_t -#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f64m8_b8 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 -#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8 -#define VMFIRSTM __riscv_vfirst_m_b8 +#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f64m8_b8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f64m8) +#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f64m8_b8) +#define VMFIRSTM RISCV_RVV(vfirst_m_b8) #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m -#define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m -#define VADDVX_UINT __riscv_vadd_vx_u64m8 -#define VMVVX_UINT __riscv_vmv_v_x_u64m8 -#define VCOMPRESS __riscv_vcompress_vm_u64m8 -#define VMV_X __riscv_vmv_x_s_u64m8_u64 +#define VIDV_UINT RISCV_RVV(vid_v_u64m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8) +#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64) #else -#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n) #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m8 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) vfredmin_vs_f32m8_f32m1(v_res, va, vb, gvl) +#define VIDV_MASK_UINT(mask, gvl) RISCV_RVV(vid_v_u32m8_m)(mask, v_min_index, gvl) +#define VADDVX_MASK_UINT(mask, a, b, gvl) RISCV_RVV(vadd_vx_u32m8_m)(mask, a, a, b, gvl) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl) +#else #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 -#define MASK_T vbool4_t -#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f32m8_b4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 -#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4 -#define VMFIRSTM __riscv_vfirst_m_b4 -#define UINT_V_T vuint32m8_t #define VIDV_MASK_UINT __riscv_vid_v_u32m8_m -#define VIDV_UINT __riscv_vid_v_u32m8 #define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m -#define VADDVX_UINT __riscv_vadd_vx_u32m8 -#define VMVVX_UINT __riscv_vmv_v_x_u32m8 -#define VCOMPRESS __riscv_vcompress_vm_u32m8 -#define VMV_X __riscv_vmv_x_s_u32m8_u32 +#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8) +#endif +#define MASK_T vbool4_t +#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f32m8_b4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f32m8) +#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f32m8_b4) +#define VMFIRSTM RISCV_RVV(vfirst_m_b4) +#define UINT_V_T vuint32m8_t +#define VIDV_UINT RISCV_RVV(vid_v_u32m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8) +#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32) #endif diff --git a/kernel/riscv64/izamax_rvv.c b/kernel/riscv64/izamax_rvv.c index e61d0cbec..e93f0056c 100644 --- a/kernel/riscv64/izamax_rvv.c +++ b/kernel/riscv64/izamax_rvv.c @@ -44,13 +44,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFIRSTM __riscv_vfirst_m_b16 #define UINT_V_T vuint64m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu #define VIDV_UINT __riscv_vid_v_u64m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -73,13 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint32m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu #define VIDV_UINT __riscv_vid_v_u32m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -116,11 +116,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx0, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx0, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl); } } else { @@ -138,11 +138,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx0, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); - + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); + //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx0, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl); } } diff --git a/kernel/riscv64/izamax_vector.c b/kernel/riscv64/izamax_vector.c index 89cd510c1..d33e89c00 100644 --- a/kernel/riscv64/izamax_vector.c +++ b/kernel/riscv64/izamax_vector.c @@ -31,58 +31,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n) #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m8 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmax_vs_f64m8_f64m1)(v_res, va, vb, gvl) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl) +#else +#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f64m8_f64m1) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_mu) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8) +#endif #define MASK_T vbool8_t -#define VMFLTVV_FLOAT __riscv_vmflt_vv_f64m8_b8 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 -#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8 -#define VMFIRSTM __riscv_vfirst_m_b8 +#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f64m8_b8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f64m8) +#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f64m8_b8) +#define VMFIRSTM RISCV_RVV(vfirst_m_b8) #define UINT_V_T vuint64m8_t -#define VSEVU_UINT __riscv_vse64_v_u64m8 +#define VSEVU_UINT RISCV_RVV(vse64_v_u64m8) #define UINT_T long unsigned int -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu -#define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u64m8 -#define VMVVX_UINT __riscv_vmv_v_x_u64m8 -#define VFABS_FLOAT __riscv_vfabs_v_f64m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 -#define VCOMPRESS __riscv_vcompress_vm_u64m8 -#define VMV_X __riscv_vmv_x_s_u64m8_u64 +#define VIDV_UINT RISCV_RVV(vid_v_u64m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m8) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f64m8) +#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64) #else -#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n) #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m8 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmax_vs_f32m8_f32m1)(v_res, va, vb, gvl) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_m) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl) +#else +#define VFREDMAXVS_FLOAT RISCV_RVV(vfredmax_vs_f32m8_f32m1) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_mu) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8) +#endif #define MASK_T vbool4_t -#define VMFLTVV_FLOAT __riscv_vmflt_vv_f32m8_b4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 -#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4 -#define VMFIRSTM __riscv_vfirst_m_b4 +#define VMFLTVV_FLOAT RISCV_RVV(vmflt_vv_f32m8_b4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMAXVV_FLOAT RISCV_RVV(vfmax_vv_f32m8) +#define VMFGEVF_FLOAT RISCV_RVV(vmfge_vf_f32m8_b4) +#define VMFIRSTM RISCV_RVV(vfirst_m_b4) #define UINT_V_T vuint32m8_t #define UINT_T unsigned int -#define VSEVU_UINT __riscv_vse32_v_u32m8 -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu -#define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u32m8 -#define VMVVX_UINT __riscv_vmv_v_x_u32m8 -#define VFABS_FLOAT __riscv_vfabs_v_f32m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 -#define VCOMPRESS __riscv_vcompress_vm_u32m8 -#define VMV_X __riscv_vmv_x_s_u32m8_u32 +#define VSEVU_UINT RISCV_RVV(vse32_v_u32m8) +#define VIDV_UINT RISCV_RVV(vid_v_u32m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m8) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f32m8) +#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32) #endif diff --git a/kernel/riscv64/izamin_rvv.c b/kernel/riscv64/izamin_rvv.c index 297b3c99a..b5bc27404 100644 --- a/kernel/riscv64/izamin_rvv.c +++ b/kernel/riscv64/izamin_rvv.c @@ -43,13 +43,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFIRSTM __riscv_vfirst_m_b16 #define UINT_V_T vuint64m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu #define VIDV_UINT __riscv_vid_v_u64m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -70,13 +70,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint32m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu #define VIDV_UINT __riscv_vid_v_u32m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -113,11 +113,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx0, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx0, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl); } } else { @@ -136,11 +136,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx0, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx0, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl); } } diff --git a/kernel/riscv64/izamin_vector.c b/kernel/riscv64/izamin_vector.c index 74daf32b8..a3877a46c 100644 --- a/kernel/riscv64/izamin_vector.c +++ b/kernel/riscv64/izamin_vector.c @@ -31,58 +31,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n) #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m8 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 -#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmin_vs_f64m8_f64m1)(v_res, va, vb, gvl) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_m) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u64m8)(vm, compressed, va, gvl) +#else +#define VFREDMINVS_FLOAT RISCV_RVV(vfredmin_vs_f64m8_f64m1) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u64m8_mu) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u64m8_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u64m8) +#endif #define MASK_T vbool8_t -#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f64m8_b8 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 -#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8 -#define VMFIRSTM __riscv_vfirst_m_b8 +#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f64m8_b8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f64m8) +#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f64m8_b8) +#define VMFIRSTM RISCV_RVV(vfirst_m_b8) #define UINT_V_T vuint64m8_t #define VSEVU_UINT vse64_v_u64m8 #define UINT_T long unsigned int -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu -#define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u64m8 -#define VMVVX_UINT __riscv_vmv_v_x_u64m8 -#define VFABS_FLOAT __riscv_vfabs_v_f64m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 -#define VCOMPRESS __riscv_vcompress_vm_u64m8 -#define VMV_X __riscv_vmv_x_s_u64m8_u64 +#define VIDV_UINT RISCV_RVV(vid_v_u64m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u64m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u64m8) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f64m8) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f64m8) +#define VMV_X RISCV_RVV(vmv_x_s_u64m8_u64) #else -#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n) #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m8 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 -#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) RISCV_RVV(vfredmin_vs_f32m8_f32m1)(v_res, va, vb, gvl) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_m) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_m) +#define VCOMPRESS(va, vm, gvl) RISCV_RVV(vcompress_vm_u32m8)(vm, compressed, va, gvl) +#else +#define VFREDMINVS_FLOAT RISCV_RVV(vfredmin_vs_f32m8_f32m1) +#define VIDV_MASK_UINT RISCV_RVV(vid_v_u32m8_mu) +#define VADDVX_MASK_UINT RISCV_RVV(vadd_vx_u32m8_mu) +#define VCOMPRESS RISCV_RVV(vcompress_vm_u32m8) +#endif #define MASK_T vbool4_t -#define VMFGTVV_FLOAT __riscv_vmfgt_vv_f32m8_b4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 -#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4 -#define VMFIRSTM __riscv_vfirst_m_b4 +#define VMFGTVV_FLOAT RISCV_RVV(vmfgt_vv_f32m8_b4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMINVV_FLOAT RISCV_RVV(vfmin_vv_f32m8) +#define VMFLEVF_FLOAT RISCV_RVV(vmfle_vf_f32m8_b4) +#define VMFIRSTM RISCV_RVV(vfirst_m_b4) #define UINT_V_T vuint32m8_t #define UINT_T unsigned int -#define VSEVU_UINT __riscv_vse32_v_u32m8 -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu -#define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu -#define VADDVX_UINT __riscv_vadd_vx_u32m8 -#define VMVVX_UINT __riscv_vmv_v_x_u32m8 -#define VFABS_FLOAT __riscv_vfabs_v_f32m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 -#define VCOMPRESS __riscv_vcompress_vm_u32m8 -#define VMV_X __riscv_vmv_x_s_u32m8_u32 +#define VSEVU_UINT RISCV_RVV(vse32_v_u32m8) +#define VIDV_UINT RISCV_RVV(vid_v_u32m8) +#define VADDVX_UINT RISCV_RVV(vadd_vx_u32m8) +#define VMVVX_UINT RISCV_RVV(vmv_v_x_u32m8) +#define VFABS_FLOAT RISCV_RVV(vfabs_v_f32m8) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f32m8) +#define VMV_X RISCV_RVV(vmv_x_s_u32m8_u32) #endif diff --git a/kernel/riscv64/max_rvv.c b/kernel/riscv64/max_rvv.c index 9315321f4..745c27bf4 100644 --- a/kernel/riscv64/max_rvv.c +++ b/kernel/riscv64/max_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else #define VSETVL(n) __riscv_vsetvl_e64m8(n) @@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLEV_FLOAT(x, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } else { @@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLSEV_FLOAT(x, stride_x, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } diff --git a/kernel/riscv64/max_vector.c b/kernel/riscv64/max_vector.c index 97f602e51..ee9920cd2 100644 --- a/kernel/riscv64/max_vector.c +++ b/kernel/riscv64/max_vector.c @@ -54,17 +54,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFMAXVV_FLOAT JOIN(__riscv_vfmax, _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/min_rvv.c b/kernel/riscv64/min_rvv.c index 158b682fd..78528fef9 100644 --- a/kernel/riscv64/min_rvv.c +++ b/kernel/riscv64/min_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else #define VSETVL(n) __riscv_vsetvl_e64m8(n) @@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLEV_FLOAT(x, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } else { @@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLSEV_FLOAT(x, stride_x, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } diff --git a/kernel/riscv64/min_vector.c b/kernel/riscv64/min_vector.c index 77bf19b9d..2001840bb 100644 --- a/kernel/riscv64/min_vector.c +++ b/kernel/riscv64/min_vector.c @@ -54,17 +54,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFMINVV_FLOAT JOIN(__riscv_vfmin, _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/nrm2_rvv.c b/kernel/riscv64/nrm2_rvv.c index 42abfa119..994fadb70 100644 --- a/kernel/riscv64/nrm2_rvv.c +++ b/kernel/riscv64/nrm2_rvv.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -79,7 +79,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VLEV_FLOAT(x, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); } } else { @@ -91,7 +91,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VLSEV_FLOAT(x, stride_x, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); } } diff --git a/kernel/riscv64/nrm2_vector.c b/kernel/riscv64/nrm2_vector.c index 205f07eb7..141dffebf 100644 --- a/kernel/riscv64/nrm2_vector.c +++ b/kernel/riscv64/nrm2_vector.c @@ -52,38 +52,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVSF_FLOAT JOIN(__riscv_vfmv, _s_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVSF_FLOAT JOIN(RISCV_RVV(vfmv), _s_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VFABS JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VMFNE JOIN(__riscv_vmfne_vf_f,ELEN, LMUL, _b, MLEN) -#define VMFGT JOIN(__riscv_vmfgt_vv_f,ELEN, LMUL, _b, MLEN) -#define VMFEQ JOIN(__riscv_vmfeq_vf_f,ELEN, LMUL, _b, MLEN) -#define VCPOP JOIN(__riscv_vcpop, _m_b, MLEN, _, _) -#define VFREDMAX JOIN(__riscv_vfredmax_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1) -#define VFREDMIN JOIN(__riscv_vfredmin_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1) -#define VFIRST JOIN(__riscv_vfirst, _m_b, MLEN, _, _) -#define VRGATHER JOIN(__riscv_vrgather, _vx_f, ELEN, LMUL, _) -#define VFDIV JOIN(__riscv_vfdiv, _vv_f, ELEN, LMUL, _) -#define VFDIV_M JOIN(__riscv_vfdiv, _vv_f, ELEN, LMUL, _mu) -#define VFMUL JOIN(__riscv_vfmul, _vv_f, ELEN, LMUL, _) -#define VFMUL_M JOIN(__riscv_vfmul, _vv_f, ELEN, LMUL, _mu) -#define VFMACC JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _) -#define VFMACC_M JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _mu) -#define VMSBF JOIN(__riscv_vmsbf, _m_b, MLEN, _, _) -#define VMSOF JOIN(__riscv_vmsof, _m_b, MLEN, _, _) -#define VMAND JOIN(__riscv_vmand, _mm_b, MLEN, _, _) -#define VMANDN JOIN(__riscv_vmandn, _mm_b, MLEN, _, _) -#define VFREDSUM JOIN(__riscv_vfredusum_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1) -#define VMERGE JOIN(__riscv_vmerge, _vvm_f, ELEN, LMUL, _) +#define VFABS JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VMFNE JOIN(RISCV_RVV(vmfne_vf_f),ELEN, LMUL, _b, MLEN) +#define VMFGT JOIN(RISCV_RVV(vmfgt_vv_f),ELEN, LMUL, _b, MLEN) +#define VMFEQ JOIN(RISCV_RVV(vmfeq_vf_f),ELEN, LMUL, _b, MLEN) +#define VCPOP JOIN(RISCV_RVV(vcpop), _m_b, MLEN, _, _) +#ifdef RISCV_0p10_INTRINSICS +#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _m) +#define VFMUL_M JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _m) +#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _m) +#define VMERGE(a, b, mask, gvl) JOIN(RISCV_RVV(vmerge), _vvm_f, ELEN, LMUL, _)(mask, a, b, gvl) +#else +#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _mu) +#define VFMUL_M JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _mu) +#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _mu) +#define VMERGE JOIN(RISCV_RVV(vmerge), _vvm_f, ELEN, LMUL, _) +#endif +#define VFIRST JOIN(RISCV_RVV(vfirst), _m_b, MLEN, _, _) +#define VRGATHER JOIN(RISCV_RVV(vrgather), _vx_f, ELEN, LMUL, _) +#define VFDIV JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _) +#define VFMUL JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _) +#define VFMACC JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _) +#define VMSBF JOIN(RISCV_RVV(vmsbf), _m_b, MLEN, _, _) +#define VMSOF JOIN(RISCV_RVV(vmsof), _m_b, MLEN, _, _) +#define VMAND JOIN(RISCV_RVV(vmand), _mm_b, MLEN, _, _) +#define VMANDN JOIN(RISCV_RVV(vmandn), _mm_b, MLEN, _, _) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) #if defined(DOUBLE) #define ABS fabs @@ -91,7 +95,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ABS fabsf #endif -#define EXTRACT_FLOAT0_V(v) JOIN(__riscv_vfmv_f_s_f, ELEN, LMUL, _f, ELEN)(v) +#define EXTRACT_FLOAT0_V(v) JOIN(RISCV_RVV(vfmv_f_s_f), ELEN, LMUL, _f, ELEN)(v) //#define DUMP( label, v0, gvl ) #define DUMP( label, v0, gvl ) do{ FLOAT x[16]; VSEV_FLOAT( x, v0, gvl ); printf ("%s(%d): %s [ ", __FILE__, __LINE__, label); for( int xxx = 0; xxx < gvl; ++xxx ) { printf("%f, ", x[xxx]); } printf(" ]\n"); } while(0) diff --git a/kernel/riscv64/rot_vector.c b/kernel/riscv64/rot_vector.c index f7aa6c3c2..649d9bb94 100644 --- a/kernel/riscv64/rot_vector.c +++ b/kernel/riscv64/rot_vector.c @@ -28,27 +28,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 -#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) +#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 -#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) +#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f64m4) #endif int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) diff --git a/kernel/riscv64/scal_vector.c b/kernel/riscv64/scal_vector.c index f6b87d4f9..8fa9315f6 100644 --- a/kernel/riscv64/scal_vector.c +++ b/kernel/riscv64/scal_vector.c @@ -52,14 +52,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMULVF_FLOAT JOIN(__riscv_vfmul, _vf_f, ELEN, LMUL, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMULVF_FLOAT JOIN(RISCV_RVV(vfmul), _vf_f, ELEN, LMUL, _) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) { diff --git a/kernel/riscv64/sum_rvv.c b/kernel/riscv64/sum_rvv.c index 9715faf22..c5629197f 100644 --- a/kernel/riscv64/sum_rvv.c +++ b/kernel/riscv64/sum_rvv.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -73,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLEV_FLOAT(x, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } else { @@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLSEV_FLOAT(x, stride_x, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } diff --git a/kernel/riscv64/sum_vector.c b/kernel/riscv64/sum_vector.c index 3fd217375..cf734faab 100644 --- a/kernel/riscv64/sum_vector.c +++ b/kernel/riscv64/sum_vector.c @@ -29,27 +29,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m8(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m8 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 -#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#define VFREDSUMVS_FLOAT RISCV_RVV(vfredusum_vs_f32m8_f32m1) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f32m8) #else -#define VSETVL(n) __riscv_vsetvl_e64m8(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m8 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 -#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8) +#define VFREDSUMVS_FLOAT RISCV_RVV(vfredusum_vs_f64m8_f64m1) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f64m8) #endif FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/swap_vector.c b/kernel/riscv64/swap_vector.c index baf3d8f69..3b467a586 100644 --- a/kernel/riscv64/swap_vector.c +++ b/kernel/riscv64/swap_vector.c @@ -53,12 +53,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) { diff --git a/kernel/riscv64/symv_L_rvv.c b/kernel/riscv64/symv_L_rvv.c index b27db2e37..888d628a5 100644 --- a/kernel/riscv64/symv_L_rvv.c +++ b/kernel/riscv64/symv_L_rvv.c @@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VSSEV_FLOAT __riscv_vsse32_v_f32m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 @@ -56,7 +56,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VSSEV_FLOAT __riscv_vsse64_v_f64m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 @@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); } v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); @@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); iy += inc_yv; } @@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; } @@ -201,7 +201,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; iy += inc_yv; diff --git a/kernel/riscv64/symv_L_vector.c b/kernel/riscv64/symv_L_vector.c index f3b922195..cd89c63ec 100644 --- a/kernel/riscv64/symv_L_vector.c +++ b/kernel/riscv64/symv_L_vector.c @@ -27,35 +27,43 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m4_f32m1(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m4_f64m1(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) diff --git a/kernel/riscv64/symv_U_rvv.c b/kernel/riscv64/symv_U_rvv.c index 7e45b1a01..3cfd3ee4c 100644 --- a/kernel/riscv64/symv_U_rvv.c +++ b/kernel/riscv64/symv_U_rvv.c @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VSSEV_FLOAT __riscv_vsse32_v_f32m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 @@ -57,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VSSEV_FLOAT __riscv_vsse64_v_f64m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 @@ -101,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); } v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max); @@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); iy += inc_yv; } @@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; } @@ -200,7 +200,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; iy += inc_yv; } diff --git a/kernel/riscv64/symv_U_vector.c b/kernel/riscv64/symv_U_vector.c index 9977e2741..894c6a643 100644 --- a/kernel/riscv64/symv_U_vector.c +++ b/kernel/riscv64/symv_U_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f32m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m4_f32m1(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f32m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f64m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m4_f64m1(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f64m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) diff --git a/kernel/riscv64/zamax_rvv.c b/kernel/riscv64/zamax_rvv.c index 615b7519c..bbb1e876b 100644 --- a/kernel/riscv64/zamax_rvv.c +++ b/kernel/riscv64/zamax_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmax = VFMAXVV_FLOAT(vmax, v0, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl); } @@ -101,7 +101,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmax = VFMAXVV_FLOAT(vmax, v0, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl); } } diff --git a/kernel/riscv64/zamax_vector.c b/kernel/riscv64/zamax_vector.c index 4301528bd..2dee5ab29 100644 --- a/kernel/riscv64/zamax_vector.c +++ b/kernel/riscv64/zamax_vector.c @@ -53,19 +53,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl) +#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl) +#else +#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) +#endif #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFRSUBVF_MASK_FLOAT JOIN(__riscv_vfrsub,_vf_f, ELEN, LMUL, _m) -#define VFMAXVV_FLOAT JOIN(__riscv_vfmax, _vv_f, ELEN, LMUL, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zamin_rvv.c b/kernel/riscv64/zamin_rvv.c index a0d36d46f..c5453121b 100644 --- a/kernel/riscv64/zamin_rvv.c +++ b/kernel/riscv64/zamin_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m4_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m4_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmin = VFMINVV_FLOAT(vmin, v0, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl); } } else { @@ -100,7 +100,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmin = VFMINVV_FLOAT(vmin, v0, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl); } } diff --git a/kernel/riscv64/zamin_vector.c b/kernel/riscv64/zamin_vector.c index 095b1c3df..df9a7a7e1 100644 --- a/kernel/riscv64/zamin_vector.c +++ b/kernel/riscv64/zamin_vector.c @@ -55,19 +55,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl) +#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl) +#else +#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) +#endif #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFRSUBVF_MASK_FLOAT JOIN(__riscv_vfrsub,_vf_f, ELEN, LMUL, _m) -#define VFMINVV_FLOAT JOIN(__riscv_vfmin, _vv_f, ELEN, LMUL, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zasum_rvv.c b/kernel/riscv64/zasum_rvv.c index 1d2f0e1fe..ebec1b19c 100644 --- a/kernel/riscv64/zasum_rvv.c +++ b/kernel/riscv64/zasum_rvv.c @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #else #define VSETVL(n) __riscv_vsetvl_e64m8(n) @@ -51,7 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #endif @@ -75,8 +75,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } @@ -93,8 +93,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } diff --git a/kernel/riscv64/zasum_vector.c b/kernel/riscv64/zasum_vector.c index 9136f0037..fca904d6a 100644 --- a/kernel/riscv64/zasum_vector.c +++ b/kernel/riscv64/zasum_vector.c @@ -53,17 +53,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt, _vf_f, ELEN, LMUL, MLEN) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUMVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt), _vf_f, ELEN, LMUL, MLEN) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zaxpby_vector.c b/kernel/riscv64/zaxpby_vector.c index 404f51fb3..d5ad974cf 100644 --- a/kernel/riscv64/zaxpby_vector.c +++ b/kernel/riscv64/zaxpby_vector.c @@ -28,25 +28,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 -#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) +#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 -#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) +#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i, FLOAT *y, BLASLONG inc_y) diff --git a/kernel/riscv64/zaxpy_vector.c b/kernel/riscv64/zaxpy_vector.c index 20bfe74ec..d19e51118 100644 --- a/kernel/riscv64/zaxpy_vector.c +++ b/kernel/riscv64/zaxpy_vector.c @@ -28,19 +28,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) diff --git a/kernel/riscv64/zcopy_vector.c b/kernel/riscv64/zcopy_vector.c index 9da60acb0..9e4a67b71 100644 --- a/kernel/riscv64/zcopy_vector.c +++ b/kernel/riscv64/zcopy_vector.c @@ -27,15 +27,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) #endif diff --git a/kernel/riscv64/zdot_rvv.c b/kernel/riscv64/zdot_rvv.c index 1543c513d..fa0e89353 100644 --- a/kernel/riscv64/zdot_rvv.c +++ b/kernel/riscv64/zdot_rvv.c @@ -36,12 +36,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 #define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else #define VSETVL(n) __riscv_vsetvl_e64m4(n) @@ -52,12 +52,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 #define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -86,14 +86,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSEG_FLOAT(&vx0, &vx1, x, vl); VLSEG_FLOAT(&vy0, &vy1, y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } @@ -107,14 +107,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSEG_FLOAT(&vx0, &vx1, x, vl); VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } } else if (inc_y == 1){ @@ -127,14 +127,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl); VLSEG_FLOAT(&vy0, &vy1, y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } }else { @@ -148,14 +148,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl); VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } } diff --git a/kernel/riscv64/zdot_vector.c b/kernel/riscv64/zdot_vector.c index 57542714a..13b8fe378 100644 --- a/kernel/riscv64/zdot_vector.c +++ b/kernel/riscv64/zdot_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f32m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 -#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f32m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#define VFMSACVV_FLOAT RISCV_RVV(vfmsac_vv_f32m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f64m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 -#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f64m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) +#define VFMSACVV_FLOAT RISCV_RVV(vfmsac_vv_f64m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4) #endif OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) diff --git a/kernel/riscv64/zgemv_n_vector.c b/kernel/riscv64/zgemv_n_vector.c index f4acad770..104d3865d 100644 --- a/kernel/riscv64/zgemv_n_vector.c +++ b/kernel/riscv64/zgemv_n_vector.c @@ -27,23 +27,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) diff --git a/kernel/riscv64/zgemv_t_rvv.c b/kernel/riscv64/zgemv_t_rvv.c index 15795cc3a..2f0380530 100644 --- a/kernel/riscv64/zgemv_t_rvv.c +++ b/kernel/riscv64/zgemv_t_rvv.c @@ -35,8 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 @@ -49,8 +49,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 @@ -90,15 +90,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl); #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl); #else - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFMACCVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl); #endif j += vl * 2; ix += vl * inc_x * 2; @@ -134,15 +134,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl); #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl); #else - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFMACCVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl); #endif j += vl * 2; ix += vl * inc_x * 2; diff --git a/kernel/riscv64/zgemv_t_vector.c b/kernel/riscv64/zgemv_t_vector.c index 179454094..5d85ab3a4 100644 --- a/kernel/riscv64/zgemv_t_vector.c +++ b/kernel/riscv64/zgemv_t_vector.c @@ -27,31 +27,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m2(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m2)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m2_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m2 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m2_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m2 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m2 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m2 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m2) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m2_f32m1)(vr, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m2(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m2_f32m1)(va, vb, gvl) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m2) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m2) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m2) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m2) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m2)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m2_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m2 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m2_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m2 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m2 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m2 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m2) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m2_f64m1)(vr, va, vb, gvl) +#else +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m2_f64m1)(va, vb, gvl) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m2) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m2) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m2) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m2) #endif int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) @@ -93,8 +101,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl); vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); #endif - v_res_r = VFREDSUM_FLOAT(vr, v_res_r, gvl); - v_res_i = VFREDSUM_FLOAT(vi, v_res_i, gvl); + v_res_r = VFREDSUM_FLOAT(v_res_r, vr, v_res_r, gvl); + v_res_i = VFREDSUM_FLOAT(v_res_i, vi, v_res_i, gvl); j += inc_av; ix += inc_xv; @@ -117,8 +125,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); #endif - v_res_r = VFREDSUM_FLOAT(vr, v_res_r, gvl); - v_res_i = VFREDSUM_FLOAT(vi, v_res_i, gvl); + v_res_r = VFREDSUM_FLOAT(v_res_r, vr, v_res_r, gvl); + v_res_i = VFREDSUM_FLOAT(v_res_i, vi, v_res_i, gvl); } temp_r = VFMVFS_FLOAT(v_res_r); diff --git a/kernel/riscv64/zhemv_LM_vector.c b/kernel/riscv64/zhemv_LM_vector.c index e025120e5..117db7d84 100644 --- a/kernel/riscv64/zhemv_LM_vector.c +++ b/kernel/riscv64/zhemv_LM_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ diff --git a/kernel/riscv64/zhemv_UV_vector.c b/kernel/riscv64/zhemv_UV_vector.c index 0e1ea5436..7c6b63bf3 100644 --- a/kernel/riscv64/zhemv_UV_vector.c +++ b/kernel/riscv64/zhemv_UV_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ diff --git a/kernel/riscv64/znrm2_rvv.c b/kernel/riscv64/znrm2_rvv.c index 5f7873b5a..d2b27aa8d 100644 --- a/kernel/riscv64/znrm2_rvv.c +++ b/kernel/riscv64/znrm2_rvv.c @@ -36,10 +36,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1 +#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f32m4_f32m1_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #else @@ -51,10 +51,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1 +#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f64m4_f64m1_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #endif @@ -85,11 +85,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_max = VFREDMAXVS_FLOAT(v0, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); - v_max = VFREDMAXVS_FLOAT(v1, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v1, v1, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl); } } else { @@ -103,11 +103,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_max = VFREDMAXVS_FLOAT(v0, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); - v_max = VFREDMAXVS_FLOAT(v1, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v1, v1, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl); } } diff --git a/kernel/riscv64/znrm2_vector.c b/kernel/riscv64/znrm2_vector.c index 437bf4246..8614f7539 100644 --- a/kernel/riscv64/znrm2_vector.c +++ b/kernel/riscv64/znrm2_vector.c @@ -52,37 +52,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VFABS JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VMFNE JOIN(__riscv_vmfne_vf_f,ELEN, LMUL, _b, MLEN) -#define VMFGT JOIN(__riscv_vmfgt_vv_f,ELEN, LMUL, _b, MLEN) -#define VMFEQ JOIN(__riscv_vmfeq_vv_f,ELEN, LMUL, _b, MLEN) -#define VCPOP JOIN(__riscv_vcpop, _m_b, MLEN, _, _) -#define VFREDMAX JOIN(__riscv_vfredmax_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1) -#define VFIRST JOIN(__riscv_vfirst, _m_b, MLEN, _, _) -#define VRGATHER JOIN(__riscv_vrgather, _vx_f, ELEN, LMUL, _) -#define VFDIV JOIN(__riscv_vfdiv, _vf_f, ELEN, LMUL, _) -#define VFDIV_M JOIN(__riscv_vfdiv, _vv_f, ELEN, LMUL, _mu) -#define VFMUL JOIN(__riscv_vfmul, _vv_f, ELEN, LMUL, _) -#define VFMACC JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _) -#define VFMACC_M JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _mu) -#define VMSOF JOIN(__riscv_vmsof, _m_b, MLEN, _, _) -#define VMANDN JOIN(__riscv_vmandn, _mm_b, MLEN, _, _) -#define VFREDUSUM JOIN(__riscv_vfredusum_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1) +#define VFABS JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VMFNE JOIN(RISCV_RVV(vmfne_vf_f),ELEN, LMUL, _b, MLEN) +#define VMFGT JOIN(RISCV_RVV(vmfgt_vv_f),ELEN, LMUL, _b, MLEN) +#define VMFEQ JOIN(RISCV_RVV(vmfeq_vv_f),ELEN, LMUL, _b, MLEN) +#define VCPOP JOIN(RISCV_RVV(vcpop), _m_b, MLEN, _, _) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAX(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)(v_res, va, vb, gvl) +#define VFREDUSUM(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)(v_res, va, vb, gvl) +#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _m) +#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _m) +#else +#define VFREDMAX JOIN(RISCV_RVV(vfredmax_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1) +#define VFREDUSUM JOIN(RISCV_RVV(vfredusum_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1) +#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _mu) +#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _mu) +#endif +#define VFIRST JOIN(RISCV_RVV(vfirst), _m_b, MLEN, _, _) +#define VRGATHER JOIN(RISCV_RVV(vrgather), _vx_f, ELEN, LMUL, _) +#define VFDIV JOIN(RISCV_RVV(vfdiv), _vf_f, ELEN, LMUL, _) +#define VFMUL JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _) +#define VFMACC JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _) +#define VMSOF JOIN(RISCV_RVV(vmsof), _m_b, MLEN, _, _) +#define VMANDN JOIN(RISCV_RVV(vmandn), _mm_b, MLEN, _, _) #if defined(DOUBLE) #define ABS fabs #else #define ABS fabsf #endif -#define EXTRACT_FLOAT0_V(v) JOIN(__riscv_vfmv_f_s_f, ELEN, LMUL, _f, ELEN)(v) +#define EXTRACT_FLOAT0_V(v) JOIN(RISCV_RVV(vfmv_f_s_f), ELEN, LMUL, _f, ELEN)(v) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) diff --git a/kernel/riscv64/zrot_vector.c b/kernel/riscv64/zrot_vector.c index c3afbc7cc..50751b343 100644 --- a/kernel/riscv64/zrot_vector.c +++ b/kernel/riscv64/zrot_vector.c @@ -27,27 +27,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) diff --git a/kernel/riscv64/zscal_vector.c b/kernel/riscv64/zscal_vector.c index 5d9ab7b28..2034aafaa 100644 --- a/kernel/riscv64/zscal_vector.c +++ b/kernel/riscv64/zscal_vector.c @@ -27,25 +27,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) #endif int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) diff --git a/kernel/riscv64/zsum_rvv.c b/kernel/riscv64/zsum_rvv.c index 44df112c6..b41f70eb5 100644 --- a/kernel/riscv64/zsum_rvv.c +++ b/kernel/riscv64/zsum_rvv.c @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m4_tu #else #define VSETVL(n) __riscv_vsetvl_e64m4(n) #define VSETVL_MAX __riscv_vsetvlmax_e64m4() @@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m4_tu #endif FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) @@ -69,8 +69,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) VLSEG_FLOAT(&v0, &v1, x, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } else { @@ -82,8 +82,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) VLSSEG_FLOAT(&v0, &v1, x, stride_x, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } diff --git a/kernel/riscv64/zsum_vector.c b/kernel/riscv64/zsum_vector.c index 7aab15105..ca0b02b5c 100644 --- a/kernel/riscv64/zsum_vector.c +++ b/kernel/riscv64/zsum_vector.c @@ -53,16 +53,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt, _vf_f, ELEN, LMUL, MLEN) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt), _vf_f, ELEN, LMUL, MLEN) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zswap_vector.c b/kernel/riscv64/zswap_vector.c index d8980602d..02c98b588 100644 --- a/kernel/riscv64/zswap_vector.c +++ b/kernel/riscv64/zswap_vector.c @@ -53,12 +53,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) {