From e1958eb70529c36d7dc4f3baf9e7bf37524053ab Mon Sep 17 00:00:00 2001 From: Octavian Maghiar Date: Wed, 5 Jul 2023 11:34:00 +0100 Subject: [PATCH 1/6] Fixes RVV masked intrinsics for iamax/iamin/imax/imin kernels Changes masked intrinsics from _m to _mu and reintroduces maskedoff argument. --- kernel/riscv64/iamax_rvv.c | 16 ++++++++-------- kernel/riscv64/iamin_rvv.c | 16 ++++++++-------- kernel/riscv64/imax_rvv.c | 16 ++++++++-------- kernel/riscv64/imin_rvv.c | 16 ++++++++-------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/kernel/riscv64/iamax_rvv.c b/kernel/riscv64/iamax_rvv.c index ef7850a55..d3508a91d 100644 --- a/kernel/riscv64/iamax_rvv.c +++ b/kernel/riscv64/iamax_rvv.c @@ -45,9 +45,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -71,9 +71,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -106,8 +106,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); //update v_max v_max = VFMAXVV_FLOAT(v_max, vx, vl); @@ -125,8 +125,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); //update v_max v_max = VFMAXVV_FLOAT(v_max, vx, vl); diff --git a/kernel/riscv64/iamin_rvv.c b/kernel/riscv64/iamin_rvv.c index 56a086fed..ae1d4f726 100644 --- a/kernel/riscv64/iamin_rvv.c +++ b/kernel/riscv64/iamin_rvv.c @@ -46,9 +46,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -72,9 +72,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -107,8 +107,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j v_min = VFMINVV_FLOAT(v_min, vx, vl); @@ -126,8 +126,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j v_min = VFMINVV_FLOAT(v_min, vx, vl); diff --git a/kernel/riscv64/imax_rvv.c b/kernel/riscv64/imax_rvv.c index 5b60a56f7..33250568d 100644 --- a/kernel/riscv64/imax_rvv.c +++ b/kernel/riscv64/imax_rvv.c @@ -45,9 +45,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -70,9 +70,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -104,8 +104,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j v_max = VFMAXVV_FLOAT(v_max, vx, vl); @@ -122,8 +122,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j v_max = VFMAXVV_FLOAT(v_max, vx, vl); diff --git a/kernel/riscv64/imin_rvv.c b/kernel/riscv64/imin_rvv.c index b49544a1b..4ce49c3af 100644 --- a/kernel/riscv64/imin_rvv.c +++ b/kernel/riscv64/imin_rvv.c @@ -45,9 +45,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -70,9 +70,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m +#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -104,8 +104,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j v_min = VFMINVV_FLOAT(v_min, vx, vl); @@ -122,8 +122,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j v_min = VFMINVV_FLOAT(v_min, vx, vl); From 1e4a3a2b5e111a6a94eb53946fa92c1715c5dd5e Mon Sep 17 00:00:00 2001 From: Octavian Maghiar Date: Wed, 12 Jul 2023 12:55:50 +0100 Subject: [PATCH 2/6] Fixes RVV masked intrinsics for izamax/izamin kernels --- kernel/riscv64/izamax_rvv.c | 18 +++++++++--------- kernel/riscv64/izamin_rvv.c | 16 ++++++++-------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/kernel/riscv64/izamax_rvv.c b/kernel/riscv64/izamax_rvv.c index e61d0cbec..e43ded820 100644 --- a/kernel/riscv64/izamax_rvv.c +++ b/kernel/riscv64/izamax_rvv.c @@ -48,9 +48,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFIRSTM __riscv_vfirst_m_b16 #define UINT_V_T vuint64m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m +#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu #define VIDV_UINT __riscv_vid_v_u64m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu #define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -77,9 +77,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint32m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m +#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu #define VIDV_UINT __riscv_vid_v_u32m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu #define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -116,8 +116,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx0, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j v_max = VFMAXVV_FLOAT(v_max, vx0, vl); @@ -138,9 +138,9 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx0, vl); - v_max_index = VIDV_MASK_UINT(mask, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl); - + v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); + //update v_max and start_index j v_max = VFMAXVV_FLOAT(v_max, vx0, vl); } diff --git a/kernel/riscv64/izamin_rvv.c b/kernel/riscv64/izamin_rvv.c index 297b3c99a..cc3c37c8e 100644 --- a/kernel/riscv64/izamin_rvv.c +++ b/kernel/riscv64/izamin_rvv.c @@ -47,9 +47,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFIRSTM __riscv_vfirst_m_b16 #define UINT_V_T vuint64m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m +#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu #define VIDV_UINT __riscv_vid_v_u64m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu #define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -74,9 +74,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint32m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m +#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu #define VIDV_UINT __riscv_vid_v_u32m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m +#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu #define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -113,8 +113,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx0, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j v_min = VFMINVV_FLOAT(v_min, vx0, vl); @@ -136,8 +136,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx0, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j v_min = VFMINVV_FLOAT(v_min, vx0, vl); From 8df0289db61ea5a3e461c94c51a5798e2dd18b86 Mon Sep 17 00:00:00 2001 From: Octavian Maghiar Date: Thu, 20 Jul 2023 15:28:35 +0100 Subject: [PATCH 3/6] Adds tail undisturbed for RVV Level 1 operations During the last iteration of some RVV operations, accumulators can get overwritten when VL < VLMAX and tail policy is agnostic. Commit changes intrinsics tail policy to undistrubed. --- kernel/riscv64/amax_rvv.c | 8 +++--- kernel/riscv64/amin_rvv.c | 8 +++--- kernel/riscv64/asum_rvv.c | 8 +++--- kernel/riscv64/dot_rvv.c | 16 +++++------ kernel/riscv64/iamax_rvv.c | 24 ++++++++-------- kernel/riscv64/iamin_rvv.c | 24 ++++++++-------- kernel/riscv64/imax_rvv.c | 24 ++++++++-------- kernel/riscv64/imin_rvv.c | 24 ++++++++-------- kernel/riscv64/izamax_rvv.c | 24 ++++++++-------- kernel/riscv64/izamin_rvv.c | 24 ++++++++-------- kernel/riscv64/max_rvv.c | 8 +++--- kernel/riscv64/min_rvv.c | 8 +++--- kernel/riscv64/nrm2_rvv.c | 8 +++--- kernel/riscv64/sum_rvv.c | 8 +++--- kernel/riscv64/zamax_rvv.c | 8 +++--- kernel/riscv64/zamin_rvv.c | 8 +++--- kernel/riscv64/zasum_rvv.c | 12 ++++---- kernel/riscv64/zdot_rvv.c | 56 ++++++++++++++++++------------------- kernel/riscv64/znrm2_rvv.c | 24 ++++++++-------- kernel/riscv64/zsum_rvv.c | 12 ++++---- 20 files changed, 168 insertions(+), 168 deletions(-) diff --git a/kernel/riscv64/amax_rvv.c b/kernel/riscv64/amax_rvv.c index be0bdbea0..451fbc834 100644 --- a/kernel/riscv64/amax_rvv.c +++ b/kernel/riscv64/amax_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else @@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLEV_FLOAT(x, vl); vx = VFABSV_FLOAT(vx, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } else { @@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLSEV_FLOAT(x, stride_x, vl); vx = VFABSV_FLOAT(vx, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } diff --git a/kernel/riscv64/amin_rvv.c b/kernel/riscv64/amin_rvv.c index d4926084b..5186d7b12 100644 --- a/kernel/riscv64/amin_rvv.c +++ b/kernel/riscv64/amin_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else @@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLEV_FLOAT(x, vl); vx = VFABSV_FLOAT(vx, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } else { @@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLSEV_FLOAT(x, stride_x, vl); vx = VFABSV_FLOAT(vx, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } diff --git a/kernel/riscv64/asum_rvv.c b/kernel/riscv64/asum_rvv.c index 691591e22..0ea610cbb 100644 --- a/kernel/riscv64/asum_rvv.c +++ b/kernel/riscv64/asum_rvv.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 @@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 @@ -76,7 +76,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLEV_FLOAT(x, vl); vx = VFABSV_FLOAT(vx, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } else { @@ -88,7 +88,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vx = VLSEV_FLOAT(x, stride_x, vl); vx = VFABSV_FLOAT(vx, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } diff --git a/kernel/riscv64/dot_rvv.c b/kernel/riscv64/dot_rvv.c index 3276695b6..837badf41 100644 --- a/kernel/riscv64/dot_rvv.c +++ b/kernel/riscv64/dot_rvv.c @@ -49,12 +49,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl); vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl); - vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl); #else vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl); vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl); - vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl); #endif } @@ -69,12 +69,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl); vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl); - vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl); #else vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl); vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl); - vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl); #endif } } else if (1 == inc_y) { @@ -88,12 +88,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl); vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl); - vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl); #else vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl); vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl); - vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl); #endif } } else { @@ -108,12 +108,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl); vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl); - vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl); #else vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl); vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl); - vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl); + vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl); #endif } } diff --git a/kernel/riscv64/iamax_rvv.c b/kernel/riscv64/iamax_rvv.c index d3508a91d..8362d7cef 100644 --- a/kernel/riscv64/iamax_rvv.c +++ b/kernel/riscv64/iamax_rvv.c @@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -68,12 +68,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -106,11 +106,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } else { @@ -125,11 +125,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } diff --git a/kernel/riscv64/iamin_rvv.c b/kernel/riscv64/iamin_rvv.c index ae1d4f726..f90dbb545 100644 --- a/kernel/riscv64/iamin_rvv.c +++ b/kernel/riscv64/iamin_rvv.c @@ -43,12 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -69,12 +69,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -107,11 +107,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } else { @@ -126,11 +126,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } diff --git a/kernel/riscv64/imax_rvv.c b/kernel/riscv64/imax_rvv.c index 33250568d..b1a77b178 100644 --- a/kernel/riscv64/imax_rvv.c +++ b/kernel/riscv64/imax_rvv.c @@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } else { @@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx, vl); - v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl); } } diff --git a/kernel/riscv64/imin_rvv.c b/kernel/riscv64/imin_rvv.c index 4ce49c3af..1de7f3233 100644 --- a/kernel/riscv64/imin_rvv.c +++ b/kernel/riscv64/imin_rvv.c @@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint64m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu #define VIDV_UINT __riscv_vid_v_u64m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m8 #define VMVVX_UINT __riscv_vmv_v_x_u64m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFIRSTM __riscv_vfirst_m_b4 #define UINT_V_T vuint32m8_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu #define VIDV_UINT __riscv_vid_v_u32m8 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m8 #define VMVVX_UINT __riscv_vmv_v_x_u32m8 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } else { @@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl); } } diff --git a/kernel/riscv64/izamax_rvv.c b/kernel/riscv64/izamax_rvv.c index e43ded820..e93f0056c 100644 --- a/kernel/riscv64/izamax_rvv.c +++ b/kernel/riscv64/izamax_rvv.c @@ -44,13 +44,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFIRSTM __riscv_vfirst_m_b16 #define UINT_V_T vuint64m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu #define VIDV_UINT __riscv_vid_v_u64m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -73,13 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint32m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu #define VIDV_UINT __riscv_vid_v_u32m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -116,11 +116,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx0, vl); - v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx0, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl); } } else { @@ -138,11 +138,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) //index where element greater than v_max mask = VMFLTVV_FLOAT(v_max, vx0, vl); - v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl); - v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl); + v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl); + v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl); //update v_max and start_index j - v_max = VFMAXVV_FLOAT(v_max, vx0, vl); + v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl); } } diff --git a/kernel/riscv64/izamin_rvv.c b/kernel/riscv64/izamin_rvv.c index cc3c37c8e..b5bc27404 100644 --- a/kernel/riscv64/izamin_rvv.c +++ b/kernel/riscv64/izamin_rvv.c @@ -43,13 +43,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFIRSTM __riscv_vfirst_m_b16 #define UINT_V_T vuint64m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu #define VIDV_UINT __riscv_vid_v_u64m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u64m4 #define VMVVX_UINT __riscv_vmv_v_x_u64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -70,13 +70,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFIRSTM __riscv_vfirst_m_b8 #define UINT_V_T vuint32m4_t -#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu +#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu #define VIDV_UINT __riscv_vid_v_u32m4 -#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu +#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu #define VADDVX_UINT __riscv_vadd_vx_u32m4 #define VMVVX_UINT __riscv_vmv_v_x_u32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -113,11 +113,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx0, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx0, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl); } } else { @@ -136,11 +136,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) // index where element less than v_min mask = VMFLTVV_FLOAT(vx0, v_min, vl); - v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl); - v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl); + v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl); + v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl); //update v_min and start_index j - v_min = VFMINVV_FLOAT(v_min, vx0, vl); + v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl); } } diff --git a/kernel/riscv64/max_rvv.c b/kernel/riscv64/max_rvv.c index 9315321f4..745c27bf4 100644 --- a/kernel/riscv64/max_rvv.c +++ b/kernel/riscv64/max_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else #define VSETVL(n) __riscv_vsetvl_e64m8(n) @@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLEV_FLOAT(x, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } else { @@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLSEV_FLOAT(x, stride_x, vl); - vmax = VFMAXVV_FLOAT(vmax, vx, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl); } } diff --git a/kernel/riscv64/min_rvv.c b/kernel/riscv64/min_rvv.c index 158b682fd..78528fef9 100644 --- a/kernel/riscv64/min_rvv.c +++ b/kernel/riscv64/min_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else #define VSETVL(n) __riscv_vsetvl_e64m8(n) @@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLEV_FLOAT(x, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } else { @@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLSEV_FLOAT(x, stride_x, vl); - vmin = VFMINVV_FLOAT(vmin, vx, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl); } } diff --git a/kernel/riscv64/nrm2_rvv.c b/kernel/riscv64/nrm2_rvv.c index 42abfa119..994fadb70 100644 --- a/kernel/riscv64/nrm2_rvv.c +++ b/kernel/riscv64/nrm2_rvv.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -79,7 +79,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VLEV_FLOAT(x, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); } } else { @@ -91,7 +91,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VLSEV_FLOAT(x, stride_x, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); } } diff --git a/kernel/riscv64/sum_rvv.c b/kernel/riscv64/sum_rvv.c index 9715faf22..c5629197f 100644 --- a/kernel/riscv64/sum_rvv.c +++ b/kernel/riscv64/sum_rvv.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu #define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -73,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLEV_FLOAT(x, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } else { @@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) vl = VSETVL(n); vx = VLSEV_FLOAT(x, stride_x, vl); - vsum = VFADDVV_FLOAT(vsum, vx, vl); + vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl); } } diff --git a/kernel/riscv64/zamax_rvv.c b/kernel/riscv64/zamax_rvv.c index 615b7519c..bbb1e876b 100644 --- a/kernel/riscv64/zamax_rvv.c +++ b/kernel/riscv64/zamax_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4 +#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmax = VFMAXVV_FLOAT(vmax, v0, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl); } @@ -101,7 +101,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmax = VFMAXVV_FLOAT(vmax, v0, vl); + vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl); } } diff --git a/kernel/riscv64/zamin_rvv.c b/kernel/riscv64/zamin_rvv.c index a0d36d46f..c5453121b 100644 --- a/kernel/riscv64/zamin_rvv.c +++ b/kernel/riscv64/zamin_rvv.c @@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m4_f32m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m4_f64m1 #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4 +#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmin = VFMINVV_FLOAT(vmin, v0, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl); } } else { @@ -100,7 +100,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v1 = VFABSV_FLOAT(v1, vl); v0 = VFADDVV_FLOAT(v0, v1, vl); - vmin = VFMINVV_FLOAT(vmin, v0, vl); + vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl); } } diff --git a/kernel/riscv64/zasum_rvv.c b/kernel/riscv64/zasum_rvv.c index 1d2f0e1fe..ebec1b19c 100644 --- a/kernel/riscv64/zasum_rvv.c +++ b/kernel/riscv64/zasum_rvv.c @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f32m8 #else #define VSETVL(n) __riscv_vsetvl_e64m8(n) @@ -51,7 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu #define VFABSV_FLOAT __riscv_vfabs_v_f64m8 #endif @@ -75,8 +75,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } @@ -93,8 +93,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } diff --git a/kernel/riscv64/zdot_rvv.c b/kernel/riscv64/zdot_rvv.c index 1543c513d..fa0e89353 100644 --- a/kernel/riscv64/zdot_rvv.c +++ b/kernel/riscv64/zdot_rvv.c @@ -36,12 +36,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 #define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #else #define VSETVL(n) __riscv_vsetvl_e64m4(n) @@ -52,12 +52,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 #define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #endif @@ -86,14 +86,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSEG_FLOAT(&vx0, &vx1, x, vl); VLSEG_FLOAT(&vy0, &vy1, y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } @@ -107,14 +107,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSEG_FLOAT(&vx0, &vx1, x, vl); VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } } else if (inc_y == 1){ @@ -127,14 +127,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl); VLSEG_FLOAT(&vy0, &vy1, y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } }else { @@ -148,14 +148,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl); VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl); - vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl); #if !defined(CONJ) - vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl); #else - vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl); - vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl); + vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl); + vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl); #endif } } diff --git a/kernel/riscv64/znrm2_rvv.c b/kernel/riscv64/znrm2_rvv.c index 5f7873b5a..d2b27aa8d 100644 --- a/kernel/riscv64/znrm2_rvv.c +++ b/kernel/riscv64/znrm2_rvv.c @@ -36,10 +36,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1 +#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f32m4_f32m1_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 #define VFABSV_FLOAT __riscv_vfabs_v_f32m4 #else @@ -51,10 +51,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1 +#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f64m4_f64m1_tu #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 #define VFABSV_FLOAT __riscv_vfabs_v_f64m4 #endif @@ -85,11 +85,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_max = VFREDMAXVS_FLOAT(v0, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); - v_max = VFREDMAXVS_FLOAT(v1, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v1, v1, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl); } } else { @@ -103,11 +103,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) v0 = VFABSV_FLOAT(v0, vl); v1 = VFABSV_FLOAT(v1, vl); - v_max = VFREDMAXVS_FLOAT(v0, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v0, v0, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl); - v_max = VFREDMAXVS_FLOAT(v1, v_max, vl); - vr = VFMACCVV_FLOAT(vr, v1, v1, vl); + v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl); + vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl); } } diff --git a/kernel/riscv64/zsum_rvv.c b/kernel/riscv64/zsum_rvv.c index 44df112c6..b41f70eb5 100644 --- a/kernel/riscv64/zsum_rvv.c +++ b/kernel/riscv64/zsum_rvv.c @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m4_tu #else #define VSETVL(n) __riscv_vsetvl_e64m4(n) #define VSETVL_MAX __riscv_vsetvlmax_e64m4() @@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4 +#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m4_tu #endif FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) @@ -69,8 +69,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) VLSEG_FLOAT(&v0, &v1, x, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } else { @@ -82,8 +82,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) VLSSEG_FLOAT(&v0, &v1, x, stride_x, vl); - v_sum = VFADDVV_FLOAT(v_sum, v0, vl); - v_sum = VFADDVV_FLOAT(v_sum, v1, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl); + v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl); } } From 826a9d5fa47f20f23f42c97385e72e121a2efb4f Mon Sep 17 00:00:00 2001 From: Octavian Maghiar Date: Tue, 25 Jul 2023 11:36:23 +0100 Subject: [PATCH 4/6] Adds tail undisturbed for RVV Level 2 operations During the last iteration of some RVV operations, accumulators can get overwritten when VL < VLMAX and tail policy is agnostic. Commit changes intrinsics tail policy to undistrubed. --- kernel/riscv64/gemv_t_rvv.c | 8 ++++---- kernel/riscv64/symv_L_rvv.c | 12 +++++------ kernel/riscv64/symv_U_rvv.c | 12 +++++------ kernel/riscv64/zgemv_t_rvv.c | 40 ++++++++++++++++++------------------ 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/kernel/riscv64/gemv_t_rvv.c b/kernel/riscv64/gemv_t_rvv.c index f0c834866..9c859aa50 100644 --- a/kernel/riscv64/gemv_t_rvv.c +++ b/kernel/riscv64/gemv_t_rvv.c @@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32 @@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLEV_FLOAT __riscv_vle64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64 @@ -79,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO va = VLEV_FLOAT(a_ptr, vl); vx = VLEV_FLOAT(x_ptr, vl); - vr = VFMACCVV_FLOAT(vr, va, vx, vl); + vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl); } v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); @@ -103,7 +103,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO va = VLEV_FLOAT(a_ptr, vl); vx = VLSEV_FLOAT(x_ptr, stride_x, vl); - vr = VFMACCVV_FLOAT(vr, va, vx, vl); + vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl); } v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); diff --git a/kernel/riscv64/symv_L_rvv.c b/kernel/riscv64/symv_L_rvv.c index b27db2e37..888d628a5 100644 --- a/kernel/riscv64/symv_L_rvv.c +++ b/kernel/riscv64/symv_L_rvv.c @@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VSSEV_FLOAT __riscv_vsse32_v_f32m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 @@ -56,7 +56,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VSSEV_FLOAT __riscv_vsse64_v_f64m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 @@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); } v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax); @@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); iy += inc_yv; } @@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; } @@ -201,7 +201,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; iy += inc_yv; diff --git a/kernel/riscv64/symv_U_rvv.c b/kernel/riscv64/symv_U_rvv.c index 7e45b1a01..3cfd3ee4c 100644 --- a/kernel/riscv64/symv_U_rvv.c +++ b/kernel/riscv64/symv_U_rvv.c @@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse32_v_f32m8 #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 #define VSSEV_FLOAT __riscv_vsse32_v_f32m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 @@ -57,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VSEV_FLOAT __riscv_vse64_v_f64m8 #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 #define VSSEV_FLOAT __riscv_vsse64_v_f64m8 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 #define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8 #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 @@ -101,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); } v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max); @@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLEV_FLOAT(&x[i], vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); iy += inc_yv; } @@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSEV_FLOAT(&y[i], vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; } @@ -200,7 +200,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA VSSEV_FLOAT(&y[iy], stride_y, vy, vl); vx = VLSEV_FLOAT(&x[ix], stride_x, vl); - vr = VFMACCVV_FLOAT(vr, vx, va, vl); + vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl); ix += inc_xv; iy += inc_yv; } diff --git a/kernel/riscv64/zgemv_t_rvv.c b/kernel/riscv64/zgemv_t_rvv.c index 15795cc3a..2f0380530 100644 --- a/kernel/riscv64/zgemv_t_rvv.c +++ b/kernel/riscv64/zgemv_t_rvv.c @@ -35,8 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4 #define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 @@ -49,8 +49,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4 #define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4 #define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu +#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 #define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 #define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 @@ -90,15 +90,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl); #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl); #else - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFMACCVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl); #endif j += vl * 2; ix += vl * inc_x * 2; @@ -134,15 +134,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl); #if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl); #else - vr = VFMACCVV_FLOAT(vr, va0, vx0, vl); - vr = VFMACCVV_FLOAT(vr, va1, vx1, vl); - vi = VFMACCVV_FLOAT(vi, va0, vx1, vl); - vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl); + vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl); + vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl); + vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl); #endif j += vl * 2; ix += vl * inc_x * 2; From 4a12cf53ec116c06e5d74073b54a3bca6046cb17 Mon Sep 17 00:00:00 2001 From: Octavian Maghiar Date: Mon, 4 Dec 2023 11:13:35 +0000 Subject: [PATCH 5/6] [RISC-V] Improve RVV kernel generator LMUL usage The RVV kernel generation script uses the provided LMUL to increase the number of accumulator registers. Since the effect of the LMUL is to group together the vector registers into larger ones, it actually should be used as a multiplier in the calculation of vlenmax. At the moment, no matter what LMUL is provided, the generated kernels would only set the maximum number of vector elements equal to VLEN/SEW. Commit changes the use of LMUL to properly adjust vlenmax. Note that an increase in LMUL results in a decrease in the number of effective vector registers. --- kernel/riscv64/generate_kernel.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/kernel/riscv64/generate_kernel.py b/kernel/riscv64/generate_kernel.py index e2ce97971..8be7c9f9c 100755 --- a/kernel/riscv64/generate_kernel.py +++ b/kernel/riscv64/generate_kernel.py @@ -197,13 +197,13 @@ def generate_gemm_kernel_inner_complex( settings, dest, M, N, vlen, a_regs ): dest.write("ai += {M}*2;") dest.write() - - accumulation_regs = a_regs * N * settings['LMUL_ACC'].value + # for each vector register loaded from matrix A, we require N registers to hold vector-scalar multiply-accumulate results + accumulation_regs = a_regs * N dest.write("// {a_regs} vector regs to hold A array contents, {accumulation_regs} regs to hold values accumulated over k", a_regs=a_regs*2, accumulation_regs=accumulation_regs*2 ) pass_regs = (accumulation_regs + a_regs)*2 - tmp_regs = 32-pass_regs + tmp_regs = (32 // settings['LMUL_ACC'].value) - pass_regs if tmp_regs < 2: raise RuntimeError("Complex kernel would use too many registers!") @@ -337,10 +337,12 @@ def generate_gemm_kernel( settings, OUTPUT ): M = settings['M'].value N = settings['N'].value - vlenmax = int( settings['reg_width_bits'].value / settings['ELEN_PARAM'].value ) + vlenmax = int(settings['reg_width_bits'].value * settings['LMUL_ACC'].value / + settings['ELEN_PARAM'].value) a_regs = max(int(M/vlenmax), 1) - accumulation_regs = a_regs * N * settings['LMUL_ACC'].value + # for each vector register loaded from matrix A, we require N registers to hold vector-scalar multiply-accumulate results + accumulation_regs = a_regs * N required_regs = accumulation_regs + a_regs if is_complex: required_regs = required_regs * 2 + 2 @@ -380,9 +382,9 @@ def generate_gemm_kernel( settings, OUTPUT ): '''.format(tail_policy=settings['tail_policy'].value)) - if required_regs > 32: - raise Exception("{} vector registers needed during accumulation for unrolling {} x {}{} but only 32 are available".format( - required_regs, N, M, (" with wide accumulator" if settings['LMUL_ACC'].value > 1 else '') + if required_regs > (32 // settings['LMUL_ACC'].value): + raise Exception("{} vector registers needed during accumulation for unrolling {} x {}{} but only {} are available".format( + required_regs, N, M, (" with wide accumulator" if settings['LMUL_ACC'].value > 1 else ''), 32 // settings['LMUL_ACC'].value )) TRMM = (settings['op'].value == 'trmm') @@ -448,7 +450,8 @@ def generate_gemm_kernel( settings, OUTPUT ): def generate_M_tails( dest, settings, M, N ): M_tail = int(M/2) M_tail_min = settings['M_tail_scalar_from'].value - vlenmax = int( settings['reg_width_bits'].value / settings['ELEN_PARAM'].value ) + vlenmax = int(settings['reg_width_bits'].value * settings['LMUL_ACC'].value + / settings['ELEN_PARAM'].value ) TRMM = (settings['op'].value == 'trmm') is_complex = settings['complex'].value generate_gemm_kernel_inner = generate_gemm_kernel_inner_complex if is_complex else generate_gemm_kernel_inner_real @@ -667,4 +670,4 @@ def main(): ERROR("unsupported kernel type {}".format(settings['op'])) if __name__ == "__main__": - main() \ No newline at end of file + main() From 9edb805e645d3530e907864e242a3f64a881b28a Mon Sep 17 00:00:00 2001 From: Sergei Lewis Date: Tue, 16 Jan 2024 14:24:18 +0000 Subject: [PATCH 6/6] fix builds with t-head toolchains that use old versions of the intrinsics spec --- common_riscv64.h | 17 +++++- cpuid_riscv64.c | 4 +- kernel/riscv64/amax_vector.c | 18 +++--- kernel/riscv64/amin_vector.c | 18 +++--- kernel/riscv64/asum_vector.c | 20 ++++--- kernel/riscv64/axpby_vector.c | 16 +++--- kernel/riscv64/axpy_vector.c | 12 ++-- kernel/riscv64/copy_vector.c | 14 ++--- kernel/riscv64/dot_vector.c | 44 +++++++++------ kernel/riscv64/gemv_n_vector.c | 24 ++++---- kernel/riscv64/gemv_t_vector.c | 42 ++++++++------ kernel/riscv64/iamax_vector.c | 80 +++++++++++++++------------ kernel/riscv64/iamin_vector.c | 82 ++++++++++++++++------------ kernel/riscv64/imax_vector.c | 82 ++++++++++++++++------------ kernel/riscv64/imin_vector.c | 80 ++++++++++++++++----------- kernel/riscv64/izamax_vector.c | 94 ++++++++++++++++++-------------- kernel/riscv64/izamin_vector.c | 92 ++++++++++++++++++------------- kernel/riscv64/max_vector.c | 20 ++++--- kernel/riscv64/min_vector.c | 20 ++++--- kernel/riscv64/nrm2_vector.c | 62 +++++++++++---------- kernel/riscv64/rot_vector.c | 36 ++++++------ kernel/riscv64/scal_vector.c | 14 ++--- kernel/riscv64/sum_vector.c | 32 +++++------ kernel/riscv64/swap_vector.c | 10 ++-- kernel/riscv64/symv_L_vector.c | 56 +++++++++++-------- kernel/riscv64/symv_U_vector.c | 60 +++++++++++--------- kernel/riscv64/zamax_vector.c | 25 +++++---- kernel/riscv64/zamin_vector.c | 25 +++++---- kernel/riscv64/zasum_vector.c | 22 +++++--- kernel/riscv64/zaxpby_vector.c | 32 +++++------ kernel/riscv64/zaxpy_vector.c | 20 +++---- kernel/riscv64/zcopy_vector.c | 12 ++-- kernel/riscv64/zdot_vector.c | 60 +++++++++++--------- kernel/riscv64/zgemv_n_vector.c | 28 +++++----- kernel/riscv64/zgemv_t_vector.c | 56 +++++++++++-------- kernel/riscv64/zhemv_LM_vector.c | 60 +++++++++++--------- kernel/riscv64/zhemv_UV_vector.c | 60 +++++++++++--------- kernel/riscv64/znrm2_vector.c | 51 +++++++++-------- kernel/riscv64/zrot_vector.c | 36 ++++++------ kernel/riscv64/zscal_vector.c | 32 +++++------ kernel/riscv64/zsum_vector.c | 16 +++--- kernel/riscv64/zswap_vector.c | 10 ++-- 42 files changed, 899 insertions(+), 695 deletions(-) diff --git a/common_riscv64.h b/common_riscv64.h index de79c8cab..f11e8b75d 100644 --- a/common_riscv64.h +++ b/common_riscv64.h @@ -91,12 +91,23 @@ static inline int blas_quickdivide(blasint x, blasint y){ #define BUFFER_SIZE ( 32 << 20) #define SEEK_ADDRESS -#if defined(C910V) || defined(RISCV64_ZVL256B) || defined(__riscv_v) +#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) # include +#endif + +#if defined( __riscv_xtheadc ) && defined( __riscv_v ) && ( __riscv_v <= 7000 ) +// t-head toolchain uses obsolete rvv intrinsics, can't build for C910V without this +#define RISCV_0p10_INTRINSICS +#define RISCV_RVV(x) x +#else +#define RISCV_RVV(x) __riscv_ ## x +#endif + +#if defined(C910V) || defined(RISCV64_ZVL256B) # if !defined(DOUBLE) -# define EXTRACT_FLOAT(v) __riscv_vfmv_f_s_f32m1_f32(v) +# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f32m1_f32)(v) # else -# define EXTRACT_FLOAT(v) __riscv_vfmv_f_s_f64m1_f64(v) +# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f64m1_f64)(v) # endif #else # define EXTRACT_FLOAT(v) (v[0]) diff --git a/cpuid_riscv64.c b/cpuid_riscv64.c index 1b6b62f21..928b5ba92 100644 --- a/cpuid_riscv64.c +++ b/cpuid_riscv64.c @@ -72,11 +72,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CPU_GENERIC 0 #define CPU_C910V 1 -#define CPU_RISCV64_ZVL256B 2 +#define CPU_x280 2 +#define CPU_RISCV64_ZVL256B 3 static char *cpuname[] = { "RISCV64_GENERIC", "C910V", + "x280", "CPU_RISCV64_ZVL256B" }; diff --git a/kernel/riscv64/amax_vector.c b/kernel/riscv64/amax_vector.c index 81a39af32..b66d4871e 100644 --- a/kernel/riscv64/amax_vector.c +++ b/kernel/riscv64/amax_vector.c @@ -49,15 +49,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/amin_vector.c b/kernel/riscv64/amin_vector.c index c8ba75f4a..1c541f0fd 100644 --- a/kernel/riscv64/amin_vector.c +++ b/kernel/riscv64/amin_vector.c @@ -48,15 +48,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/asum_vector.c b/kernel/riscv64/asum_vector.c index d10bf99e6..995dbf9a1 100644 --- a/kernel/riscv64/asum_vector.c +++ b/kernel/riscv64/asum_vector.c @@ -49,16 +49,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUMVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/axpby_vector.c b/kernel/riscv64/axpby_vector.c index b77cb58fb..386c4a5f1 100644 --- a/kernel/riscv64/axpby_vector.c +++ b/kernel/riscv64/axpby_vector.c @@ -48,15 +48,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) -#define VFMACCVF_FLOAT JOIN(__riscv_vfmacc, _vf_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMULVF_FLOAT JOIN(__riscv_vfmul, _vf_f, ELEN, LMUL, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) +#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMULVF_FLOAT JOIN(RISCV_RVV(vfmul), _vf_f, ELEN, LMUL, _) int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) { diff --git a/kernel/riscv64/axpy_vector.c b/kernel/riscv64/axpy_vector.c index 3447107a6..e99ca8542 100644 --- a/kernel/riscv64/axpy_vector.c +++ b/kernel/riscv64/axpy_vector.c @@ -49,13 +49,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) -#define VFMACCVF_FLOAT JOIN(__riscv_vfmacc, _vf_f, ELEN, LMUL, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) +#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) { diff --git a/kernel/riscv64/copy_vector.c b/kernel/riscv64/copy_vector.c index 710e8670a..ccbd6e482 100644 --- a/kernel/riscv64/copy_vector.c +++ b/kernel/riscv64/copy_vector.c @@ -47,12 +47,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) { @@ -71,7 +71,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) stride_x = inc_x * sizeof(FLOAT); if(gvl <= n/4){ BLASLONG inc_xv = inc_x * gvl; - BLASLONG gvl3 = gvl * 3; + unsigned int gvl3 = gvl * 3; BLASLONG inc_xv3 = inc_xv * 3; for(i=0,j=0; i #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m8(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m8)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m8_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m8 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 -#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m8) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) +#define VFREDSUMVS_FLOAT RISCV_RVV(vfredusum_vs_f32m8_f32m1) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f32m8) #else -#define VSETVL(n) __riscv_vsetvl_e64m8(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e64m8)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m8_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m8 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 -#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m8) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m8) +#define VFREDSUMVS_FLOAT RISCV_RVV(vfredusum_vs_f64m8_f64m1) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m8) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFADDVV_FLOAT RISCV_RVV(vfadd_vv_f64m8) #endif FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/swap_vector.c b/kernel/riscv64/swap_vector.c index baf3d8f69..3b467a586 100644 --- a/kernel/riscv64/swap_vector.c +++ b/kernel/riscv64/swap_vector.c @@ -53,12 +53,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) { diff --git a/kernel/riscv64/symv_L_vector.c b/kernel/riscv64/symv_L_vector.c index f3b922195..cd89c63ec 100644 --- a/kernel/riscv64/symv_L_vector.c +++ b/kernel/riscv64/symv_L_vector.c @@ -27,35 +27,43 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m4_f32m1(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m4_f64m1(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) diff --git a/kernel/riscv64/symv_U_vector.c b/kernel/riscv64/symv_U_vector.c index 9977e2741..894c6a643 100644 --- a/kernel/riscv64/symv_U_vector.c +++ b/kernel/riscv64/symv_U_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f32m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f32m4_f32m1(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f32m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f64m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) vfredusum_vs_f64m4_f64m1(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f64m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) diff --git a/kernel/riscv64/zamax_vector.c b/kernel/riscv64/zamax_vector.c index 4301528bd..2dee5ab29 100644 --- a/kernel/riscv64/zamax_vector.c +++ b/kernel/riscv64/zamax_vector.c @@ -53,19 +53,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl) +#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl) +#else +#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) +#endif #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFRSUBVF_MASK_FLOAT JOIN(__riscv_vfrsub,_vf_f, ELEN, LMUL, _m) -#define VFMAXVV_FLOAT JOIN(__riscv_vfmax, _vv_f, ELEN, LMUL, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zamin_vector.c b/kernel/riscv64/zamin_vector.c index 095b1c3df..df9a7a7e1 100644 --- a/kernel/riscv64/zamin_vector.c +++ b/kernel/riscv64/zamin_vector.c @@ -55,19 +55,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDMINVS_FLOAT JOIN(__riscv_vfredmin_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMINVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl) +#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl) +#else +#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) +#endif #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFRSUBVF_MASK_FLOAT JOIN(__riscv_vfrsub,_vf_f, ELEN, LMUL, _m) -#define VFMINVV_FLOAT JOIN(__riscv_vfmin, _vv_f, ELEN, LMUL, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zasum_vector.c b/kernel/riscv64/zasum_vector.c index 9136f0037..fca904d6a 100644 --- a/kernel/riscv64/zasum_vector.c +++ b/kernel/riscv64/zasum_vector.c @@ -53,17 +53,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFABS_FLOAT JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt, _vf_f, ELEN, LMUL, MLEN) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUMVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl) +#else +#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#endif +#define VFABS_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt), _vf_f, ELEN, LMUL, MLEN) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zaxpby_vector.c b/kernel/riscv64/zaxpby_vector.c index 404f51fb3..d5ad974cf 100644 --- a/kernel/riscv64/zaxpby_vector.c +++ b/kernel/riscv64/zaxpby_vector.c @@ -28,25 +28,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 -#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) +#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 -#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) +#define VFMSACVF_FLOAT RISCV_RVV(vfmsac_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG n, FLOAT alpha_r, FLOAT alpha_i, FLOAT *x, BLASLONG inc_x, FLOAT beta_r, FLOAT beta_i, FLOAT *y, BLASLONG inc_y) diff --git a/kernel/riscv64/zaxpy_vector.c b/kernel/riscv64/zaxpy_vector.c index 20bfe74ec..d19e51118 100644 --- a/kernel/riscv64/zaxpy_vector.c +++ b/kernel/riscv64/zaxpy_vector.c @@ -28,19 +28,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) diff --git a/kernel/riscv64/zcopy_vector.c b/kernel/riscv64/zcopy_vector.c index 9da60acb0..9e4a67b71 100644 --- a/kernel/riscv64/zcopy_vector.c +++ b/kernel/riscv64/zcopy_vector.c @@ -27,15 +27,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) #endif diff --git a/kernel/riscv64/zdot_vector.c b/kernel/riscv64/zdot_vector.c index 57542714a..13b8fe378 100644 --- a/kernel/riscv64/zdot_vector.c +++ b/kernel/riscv64/zdot_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f32m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 -#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f32m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#define VFMSACVV_FLOAT RISCV_RVV(vfmsac_vv_f32m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFDOTVV_FLOAT __riscv_vfdot_vv_f64m4 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 -#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFDOTVV_FLOAT RISCV_RVV(vfdot_vv_f64m4) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) +#define VFMSACVV_FLOAT RISCV_RVV(vfmsac_vv_f64m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4) #endif OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) diff --git a/kernel/riscv64/zgemv_n_vector.c b/kernel/riscv64/zgemv_n_vector.c index f4acad770..104d3865d 100644 --- a/kernel/riscv64/zgemv_n_vector.c +++ b/kernel/riscv64/zgemv_n_vector.c @@ -27,23 +27,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) #define FLOAT_V_T vfloat32m4_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) #define FLOAT_V_T vfloat64m4_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) diff --git a/kernel/riscv64/zgemv_t_vector.c b/kernel/riscv64/zgemv_t_vector.c index 179454094..5d85ab3a4 100644 --- a/kernel/riscv64/zgemv_t_vector.c +++ b/kernel/riscv64/zgemv_t_vector.c @@ -27,31 +27,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m2(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m2)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m2_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m2 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m2_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m2 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m2 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m2 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m2 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m2) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m2_f32m1)(vr, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m2(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m2_f32m1)(va, vb, gvl) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m2) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m2) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m2) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m2) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m2)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m2_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m2 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m2_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m2 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m2 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m2 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m2 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m2) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m2_f64m1)(vr, va, vb, gvl) +#else +#define VFREDSUM_FLOAT(vr, va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m2_f64m1)(va, vb, gvl) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m2) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m2) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m2) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m2) #endif int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) @@ -93,8 +101,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, vr = VFMACCVV_FLOAT(vr, va1, vx1, gvl); vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); #endif - v_res_r = VFREDSUM_FLOAT(vr, v_res_r, gvl); - v_res_i = VFREDSUM_FLOAT(vi, v_res_i, gvl); + v_res_r = VFREDSUM_FLOAT(v_res_r, vr, v_res_r, gvl); + v_res_i = VFREDSUM_FLOAT(v_res_i, vi, v_res_i, gvl); j += inc_av; ix += inc_xv; @@ -117,8 +125,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i, vi = VFNMSACVV_FLOAT(vi, va1, vx0, gvl); #endif - v_res_r = VFREDSUM_FLOAT(vr, v_res_r, gvl); - v_res_i = VFREDSUM_FLOAT(vi, v_res_i, gvl); + v_res_r = VFREDSUM_FLOAT(v_res_r, vr, v_res_r, gvl); + v_res_i = VFREDSUM_FLOAT(v_res_i, vi, v_res_i, gvl); } temp_r = VFMVFS_FLOAT(v_res_r); diff --git a/kernel/riscv64/zhemv_LM_vector.c b/kernel/riscv64/zhemv_LM_vector.c index e025120e5..117db7d84 100644 --- a/kernel/riscv64/zhemv_LM_vector.c +++ b/kernel/riscv64/zhemv_LM_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ diff --git a/kernel/riscv64/zhemv_UV_vector.c b/kernel/riscv64/zhemv_UV_vector.c index 0e1ea5436..7c6b63bf3 100644 --- a/kernel/riscv64/zhemv_UV_vector.c +++ b/kernel/riscv64/zhemv_UV_vector.c @@ -27,37 +27,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t #define FLOAT_V_T_M1 vfloat32m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f32m1_f32 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f32m1_f32) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f32m4_f32m1)(v_res, va, vb, gvl) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f32m4_f32m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f32m4) +#else +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t #define FLOAT_V_T_M1 vfloat64m1_t -#define VFMVFS_FLOAT __riscv_vfmv_f_s_f64m1_f64 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1 -#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 -#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1 -#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 -#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4 +#define VFMVFS_FLOAT RISCV_RVV(vfmv_f_s_f64m1_f64) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDSUM_FLOAT(va, vb, gvl) RISCV_RVV(vfredusum_vs_f64m4_f64m1)(v_res, va, vb, gvl) +#else +#define VFREDSUM_FLOAT RISCV_RVV(vfredusum_vs_f64m4_f64m1) +#endif +#define VFMACCVV_FLOAT RISCV_RVV(vfmacc_vv_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) +#define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f64m1) +#define VFMULVV_FLOAT RISCV_RVV(vfmul_vv_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) +#define VFNMSACVV_FLOAT RISCV_RVV(vfnmsac_vv_f64m4) #endif int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *y, BLASLONG incy, FLOAT *buffer){ diff --git a/kernel/riscv64/znrm2_vector.c b/kernel/riscv64/znrm2_vector.c index 437bf4246..8614f7539 100644 --- a/kernel/riscv64/znrm2_vector.c +++ b/kernel/riscv64/znrm2_vector.c @@ -52,37 +52,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) #define MASK_T JOIN(vbool, MLEN, _t, _, _) -#define VFABS JOIN(__riscv_vfabs, _v_f, ELEN, LMUL, _) -#define VMFNE JOIN(__riscv_vmfne_vf_f,ELEN, LMUL, _b, MLEN) -#define VMFGT JOIN(__riscv_vmfgt_vv_f,ELEN, LMUL, _b, MLEN) -#define VMFEQ JOIN(__riscv_vmfeq_vv_f,ELEN, LMUL, _b, MLEN) -#define VCPOP JOIN(__riscv_vcpop, _m_b, MLEN, _, _) -#define VFREDMAX JOIN(__riscv_vfredmax_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1) -#define VFIRST JOIN(__riscv_vfirst, _m_b, MLEN, _, _) -#define VRGATHER JOIN(__riscv_vrgather, _vx_f, ELEN, LMUL, _) -#define VFDIV JOIN(__riscv_vfdiv, _vf_f, ELEN, LMUL, _) -#define VFDIV_M JOIN(__riscv_vfdiv, _vv_f, ELEN, LMUL, _mu) -#define VFMUL JOIN(__riscv_vfmul, _vv_f, ELEN, LMUL, _) -#define VFMACC JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _) -#define VFMACC_M JOIN(__riscv_vfmacc, _vv_f, ELEN, LMUL, _mu) -#define VMSOF JOIN(__riscv_vmsof, _m_b, MLEN, _, _) -#define VMANDN JOIN(__riscv_vmandn, _mm_b, MLEN, _, _) -#define VFREDUSUM JOIN(__riscv_vfredusum_vs_f,ELEN,LMUL, JOIN2(_f, ELEN), m1) +#define VFABS JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _) +#define VMFNE JOIN(RISCV_RVV(vmfne_vf_f),ELEN, LMUL, _b, MLEN) +#define VMFGT JOIN(RISCV_RVV(vmfgt_vv_f),ELEN, LMUL, _b, MLEN) +#define VMFEQ JOIN(RISCV_RVV(vmfeq_vv_f),ELEN, LMUL, _b, MLEN) +#define VCPOP JOIN(RISCV_RVV(vcpop), _m_b, MLEN, _, _) +#ifdef RISCV_0p10_INTRINSICS +#define VFREDMAX(va, vb, gvl) JOIN(RISCV_RVV(vfredmax_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)(v_res, va, vb, gvl) +#define VFREDUSUM(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1)(v_res, va, vb, gvl) +#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _m) +#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _m) +#else +#define VFREDMAX JOIN(RISCV_RVV(vfredmax_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1) +#define VFREDUSUM JOIN(RISCV_RVV(vfredusum_vs_f),ELEN,LMUL, JOIN2(_f, ELEN), m1) +#define VFDIV_M JOIN(RISCV_RVV(vfdiv), _vv_f, ELEN, LMUL, _mu) +#define VFMACC_M JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _mu) +#endif +#define VFIRST JOIN(RISCV_RVV(vfirst), _m_b, MLEN, _, _) +#define VRGATHER JOIN(RISCV_RVV(vrgather), _vx_f, ELEN, LMUL, _) +#define VFDIV JOIN(RISCV_RVV(vfdiv), _vf_f, ELEN, LMUL, _) +#define VFMUL JOIN(RISCV_RVV(vfmul), _vv_f, ELEN, LMUL, _) +#define VFMACC JOIN(RISCV_RVV(vfmacc), _vv_f, ELEN, LMUL, _) +#define VMSOF JOIN(RISCV_RVV(vmsof), _m_b, MLEN, _, _) +#define VMANDN JOIN(RISCV_RVV(vmandn), _mm_b, MLEN, _, _) #if defined(DOUBLE) #define ABS fabs #else #define ABS fabsf #endif -#define EXTRACT_FLOAT0_V(v) JOIN(__riscv_vfmv_f_s_f, ELEN, LMUL, _f, ELEN)(v) +#define EXTRACT_FLOAT0_V(v) JOIN(RISCV_RVV(vfmv_f_s_f), ELEN, LMUL, _f, ELEN)(v) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) diff --git a/kernel/riscv64/zrot_vector.c b/kernel/riscv64/zrot_vector.c index c3afbc7cc..50751b343 100644 --- a/kernel/riscv64/zrot_vector.c +++ b/kernel/riscv64/zrot_vector.c @@ -27,27 +27,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t -#define VLEV_FLOAT __riscv_vle32_v_f32m4 -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSEV_FLOAT __riscv_vse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 +#define VLEV_FLOAT RISCV_RVV(vle32_v_f32m4) +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSEV_FLOAT RISCV_RVV(vse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t -#define VLEV_FLOAT __riscv_vle64_v_f64m4 -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSEV_FLOAT __riscv_vse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 +#define VLEV_FLOAT RISCV_RVV(vle64_v_f64m4) +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSEV_FLOAT RISCV_RVV(vse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) #endif int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT c, FLOAT s) diff --git a/kernel/riscv64/zscal_vector.c b/kernel/riscv64/zscal_vector.c index 5d9ab7b28..2034aafaa 100644 --- a/kernel/riscv64/zscal_vector.c +++ b/kernel/riscv64/zscal_vector.c @@ -27,25 +27,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if !defined(DOUBLE) -#define VSETVL(n) __riscv_vsetvl_e32m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e32m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e32m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e32m1)() #define FLOAT_V_T vfloat32m4_t -#define VLSEV_FLOAT __riscv_vlse32_v_f32m4 -#define VSSEV_FLOAT __riscv_vsse32_v_f32m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4 +#define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m4) +#define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f32m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f32m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m4) #else -#define VSETVL(n) __riscv_vsetvl_e64m4(n) -#define VSETVL_MAX __riscv_vsetvlmax_e64m1() +#define VSETVL(n) RISCV_RVV(vsetvl_e64m4)(n) +#define VSETVL_MAX RISCV_RVV(vsetvlmax_e64m1)() #define FLOAT_V_T vfloat64m4_t -#define VLSEV_FLOAT __riscv_vlse64_v_f64m4 -#define VSSEV_FLOAT __riscv_vsse64_v_f64m4 -#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m4 -#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4 -#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m4 -#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4 +#define VLSEV_FLOAT RISCV_RVV(vlse64_v_f64m4) +#define VSSEV_FLOAT RISCV_RVV(vsse64_v_f64m4) +#define VFMACCVF_FLOAT RISCV_RVV(vfmacc_vf_f64m4) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f64m4) +#define VFNMSACVF_FLOAT RISCV_RVV(vfnmsac_vf_f64m4) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f64m4) #endif int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) diff --git a/kernel/riscv64/zsum_vector.c b/kernel/riscv64/zsum_vector.c index 7aab15105..ca0b02b5c 100644 --- a/kernel/riscv64/zsum_vector.c +++ b/kernel/riscv64/zsum_vector.c @@ -53,16 +53,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) #define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VFREDSUMVS_FLOAT JOIN(__riscv_vfredusum_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1)) -#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _) -#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _) -#define VFADDVV_FLOAT JOIN(__riscv_vfadd, _vv_f, ELEN, LMUL, _) -#define VMFLTVF_FLOAT JOIN(__riscv_vmflt, _vf_f, ELEN, LMUL, MLEN) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) +#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _) +#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _) +#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _) +#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt), _vf_f, ELEN, LMUL, MLEN) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) { diff --git a/kernel/riscv64/zswap_vector.c b/kernel/riscv64/zswap_vector.c index d8980602d..02c98b588 100644 --- a/kernel/riscv64/zswap_vector.c +++ b/kernel/riscv64/zswap_vector.c @@ -53,12 +53,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define JOIN2(x, y) JOIN2_X(x, y) #define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z) -#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _) +#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _) #define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _) -#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL) -#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL) -#define VSEV_FLOAT JOIN(__riscv_vse, ELEN, _v_f, ELEN, LMUL) -#define VSSEV_FLOAT JOIN(__riscv_vsse, ELEN, _v_f, ELEN, LMUL) +#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL) +#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL) +#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL) +#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dummy4, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) {