Merge pull request #4125 from OMaghiarIMG/risc-v

Fixes RVV masked intrinsics for iamax/iamin/imax/imin kernels
This commit is contained in:
Martin Kroeker 2023-12-07 14:50:58 +01:00 committed by GitHub
commit 2d316c2920
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 49 additions and 49 deletions

View File

@ -45,9 +45,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@ -71,9 +71,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@ -106,8 +106,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
//update v_max
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
@ -125,8 +125,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
//update v_max
v_max = VFMAXVV_FLOAT(v_max, vx, vl);

View File

@ -46,9 +46,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@ -72,9 +72,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@ -107,8 +107,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
// index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
//update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);
@ -126,8 +126,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
// index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
//update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);

View File

@ -45,9 +45,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
#define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@ -70,9 +70,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
#define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@ -104,8 +104,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
//update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
@ -122,8 +122,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
//update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx, vl);

View File

@ -45,9 +45,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
#define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint64m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
#define VIDV_UINT __riscv_vid_v_u64m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m8
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@ -70,9 +70,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
#define VFIRSTM __riscv_vfirst_m_b4
#define UINT_V_T vuint32m8_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_m
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
#define VIDV_UINT __riscv_vid_v_u32m8
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m8
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@ -104,8 +104,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
// index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
//update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);
@ -122,8 +122,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
// index where element less than v_min
mask = VMFLTVV_FLOAT(vx, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
//update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx, vl);

View File

@ -48,9 +48,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
#define VFIRSTM __riscv_vfirst_m_b16
#define UINT_V_T vuint64m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu
#define VIDV_UINT __riscv_vid_v_u64m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m4
#define VMVVX_UINT __riscv_vmv_v_x_u64m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@ -77,9 +77,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
#define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint32m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu
#define VIDV_UINT __riscv_vid_v_u32m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m4
#define VMVVX_UINT __riscv_vmv_v_x_u32m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@ -116,8 +116,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx0, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
//update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx0, vl);
@ -138,9 +138,9 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
//index where element greater than v_max
mask = VMFLTVV_FLOAT(v_max, vx0, vl);
v_max_index = VIDV_MASK_UINT(mask, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, j, vl);
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
//update v_max and start_index j
v_max = VFMAXVV_FLOAT(v_max, vx0, vl);
}

View File

@ -47,9 +47,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
#define VFIRSTM __riscv_vfirst_m_b16
#define UINT_V_T vuint64m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_m
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu
#define VIDV_UINT __riscv_vid_v_u64m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu
#define VADDVX_UINT __riscv_vadd_vx_u64m4
#define VMVVX_UINT __riscv_vmv_v_x_u64m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
@ -74,9 +74,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
#define VFIRSTM __riscv_vfirst_m_b8
#define UINT_V_T vuint32m4_t
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_m
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu
#define VIDV_UINT __riscv_vid_v_u32m4
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_m
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu
#define VADDVX_UINT __riscv_vadd_vx_u32m4
#define VMVVX_UINT __riscv_vmv_v_x_u32m4
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
@ -113,8 +113,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
// index where element less than v_min
mask = VMFLTVV_FLOAT(vx0, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
//update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx0, vl);
@ -136,8 +136,8 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
// index where element less than v_min
mask = VMFLTVV_FLOAT(vx0, v_min, vl);
v_min_index = VIDV_MASK_UINT(mask, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, j, vl);
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
//update v_min and start_index j
v_min = VFMINVV_FLOAT(v_min, vx0, vl);