Merge pull request #4159 from OMaghiarIMG/risc-v-tail-policy
Set tail policy to undisturbed for RVV intrinsics accumulators
This commit is contained in:
commit
1332f8a822
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
|
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#else
|
#else
|
||||||
|
@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
|
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#endif
|
#endif
|
||||||
|
@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
vx = VLEV_FLOAT(x, vl);
|
vx = VLEV_FLOAT(x, vl);
|
||||||
vx = VFABSV_FLOAT(vx, vl);
|
vx = VFABSV_FLOAT(vx, vl);
|
||||||
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
|
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(x, stride_x, vl);
|
vx = VLSEV_FLOAT(x, stride_x, vl);
|
||||||
vx = VFABSV_FLOAT(vx, vl);
|
vx = VFABSV_FLOAT(vx, vl);
|
||||||
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
|
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
|
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#else
|
#else
|
||||||
|
@ -53,7 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
|
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#endif
|
#endif
|
||||||
|
@ -78,7 +78,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
vx = VLEV_FLOAT(x, vl);
|
vx = VLEV_FLOAT(x, vl);
|
||||||
vx = VFABSV_FLOAT(vx, vl);
|
vx = VFABSV_FLOAT(vx, vl);
|
||||||
vmin = VFMINVV_FLOAT(vmin, vx, vl);
|
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -90,7 +90,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(x, stride_x, vl);
|
vx = VLSEV_FLOAT(x, stride_x, vl);
|
||||||
vx = VFABSV_FLOAT(vx, vl);
|
vx = VFABSV_FLOAT(vx, vl);
|
||||||
vmin = VFMINVV_FLOAT(vmin, vx, vl);
|
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
||||||
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
|
@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
||||||
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
|
@ -76,7 +76,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
vx = VLEV_FLOAT(x, vl);
|
vx = VLEV_FLOAT(x, vl);
|
||||||
vx = VFABSV_FLOAT(vx, vl);
|
vx = VFABSV_FLOAT(vx, vl);
|
||||||
vsum = VFADDVV_FLOAT(vsum, vx, vl);
|
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -88,7 +88,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(x, stride_x, vl);
|
vx = VLSEV_FLOAT(x, stride_x, vl);
|
||||||
vx = VFABSV_FLOAT(vx, vl);
|
vx = VFABSV_FLOAT(vx, vl);
|
||||||
vsum = VFADDVV_FLOAT(vsum, vx, vl);
|
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,12 +49,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
|
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
|
||||||
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);
|
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#else
|
#else
|
||||||
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
|
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
|
||||||
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);
|
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,12 +69,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
|
vfloat32m4_t vx = __riscv_vle32_v_f32m4(x, vl);
|
||||||
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);
|
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#else
|
#else
|
||||||
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
|
vfloat64m8_t vx = __riscv_vle64_v_f64m8(x, vl);
|
||||||
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);
|
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
} else if (1 == inc_y) {
|
} else if (1 == inc_y) {
|
||||||
|
@ -88,12 +88,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
|
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
|
||||||
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);
|
vfloat32m4_t vy = __riscv_vle32_v_f32m4(y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#else
|
#else
|
||||||
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
|
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
|
||||||
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);
|
vfloat64m8_t vy = __riscv_vle64_v_f64m8(y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -108,12 +108,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
|
vfloat32m4_t vx = __riscv_vlse32_v_f32m4(x, stride_x, vl);
|
||||||
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);
|
vfloat32m4_t vy = __riscv_vlse32_v_f32m4(y, stride_y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfwmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfwmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#else
|
#else
|
||||||
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
|
vfloat64m8_t vx = __riscv_vlse64_v_f64m8(x, stride_x, vl);
|
||||||
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);
|
vfloat64m8_t vy = __riscv_vlse64_v_f64m8(y, stride_y, vl);
|
||||||
|
|
||||||
vr = __riscv_vfmacc_vv_f64m8(vr, vx, vy, vl);
|
vr = __riscv_vfmacc_vv_f64m8_tu(vr, vx, vy, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -79,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
|
|
||||||
va = VLEV_FLOAT(a_ptr, vl);
|
va = VLEV_FLOAT(a_ptr, vl);
|
||||||
vx = VLEV_FLOAT(x_ptr, vl);
|
vx = VLEV_FLOAT(x_ptr, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
||||||
|
@ -103,7 +103,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
|
|
||||||
va = VLEV_FLOAT(a_ptr, vl);
|
va = VLEV_FLOAT(a_ptr, vl);
|
||||||
vx = VLSEV_FLOAT(x_ptr, stride_x, vl);
|
vx = VLSEV_FLOAT(x_ptr, stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
||||||
|
|
|
@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b8
|
#define VFIRSTM __riscv_vfirst_m_b8
|
||||||
#define UINT_V_T vuint64m8_t
|
#define UINT_V_T vuint64m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u64m8
|
#define VIDV_UINT __riscv_vid_v_u64m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -68,12 +68,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b4
|
#define VFIRSTM __riscv_vfirst_m_b4
|
||||||
#define UINT_V_T vuint32m8_t
|
#define UINT_V_T vuint32m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u32m8
|
#define VIDV_UINT __riscv_vid_v_u32m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -106,11 +106,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
//index where element greater than v_max
|
//index where element greater than v_max
|
||||||
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
||||||
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
|
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
|
||||||
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
|
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
|
||||||
|
|
||||||
//update v_max
|
//update v_max
|
||||||
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
|
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -125,11 +125,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
//index where element greater than v_max
|
//index where element greater than v_max
|
||||||
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
||||||
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
|
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
|
||||||
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
|
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
|
||||||
|
|
||||||
//update v_max
|
//update v_max
|
||||||
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
|
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,12 +43,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b8
|
#define VFIRSTM __riscv_vfirst_m_b8
|
||||||
#define UINT_V_T vuint64m8_t
|
#define UINT_V_T vuint64m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u64m8
|
#define VIDV_UINT __riscv_vid_v_u64m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -69,12 +69,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b4
|
#define VFIRSTM __riscv_vfirst_m_b4
|
||||||
#define UINT_V_T vuint32m8_t
|
#define UINT_V_T vuint32m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u32m8
|
#define VIDV_UINT __riscv_vid_v_u32m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -107,11 +107,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
// index where element less than v_min
|
// index where element less than v_min
|
||||||
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
||||||
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
|
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
|
||||||
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
|
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
|
||||||
|
|
||||||
//update v_min and start_index j
|
//update v_min and start_index j
|
||||||
v_min = VFMINVV_FLOAT(v_min, vx, vl);
|
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -126,11 +126,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
// index where element less than v_min
|
// index where element less than v_min
|
||||||
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
||||||
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
|
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
|
||||||
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
|
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
|
||||||
|
|
||||||
//update v_min and start_index j
|
//update v_min and start_index j
|
||||||
v_min = VFMINVV_FLOAT(v_min, vx, vl);
|
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8
|
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f64m8_b8
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b8
|
#define VFIRSTM __riscv_vfirst_m_b8
|
||||||
#define UINT_V_T vuint64m8_t
|
#define UINT_V_T vuint64m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u64m8
|
#define VIDV_UINT __riscv_vid_v_u64m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4
|
#define VMFGEVF_FLOAT __riscv_vmfge_vf_f32m8_b4
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b4
|
#define VFIRSTM __riscv_vfirst_m_b4
|
||||||
#define UINT_V_T vuint32m8_t
|
#define UINT_V_T vuint32m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u32m8
|
#define VIDV_UINT __riscv_vid_v_u32m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
//index where element greater than v_max
|
//index where element greater than v_max
|
||||||
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
||||||
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
|
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
|
||||||
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
|
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
|
||||||
|
|
||||||
//update v_max and start_index j
|
//update v_max and start_index j
|
||||||
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
|
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
//index where element greater than v_max
|
//index where element greater than v_max
|
||||||
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
mask = VMFLTVV_FLOAT(v_max, vx, vl);
|
||||||
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
|
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
|
||||||
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
|
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
|
||||||
|
|
||||||
//update v_max and start_index j
|
//update v_max and start_index j
|
||||||
v_max = VFMAXVV_FLOAT(v_max, vx, vl);
|
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -42,12 +42,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8
|
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f64m8_b8
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b8
|
#define VFIRSTM __riscv_vfirst_m_b8
|
||||||
#define UINT_V_T vuint64m8_t
|
#define UINT_V_T vuint64m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u64m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u64m8
|
#define VIDV_UINT __riscv_vid_v_u64m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
#define VADDVX_UINT __riscv_vadd_vx_u64m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u64m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -67,12 +67,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4
|
#define VMFLEVF_FLOAT __riscv_vmfle_vf_f32m8_b4
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
|
||||||
#define VFIRSTM __riscv_vfirst_m_b4
|
#define VFIRSTM __riscv_vfirst_m_b4
|
||||||
#define UINT_V_T vuint32m8_t
|
#define UINT_V_T vuint32m8_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u32m8_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m8_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u32m8
|
#define VIDV_UINT __riscv_vid_v_u32m8
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m8_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m8_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
#define VADDVX_UINT __riscv_vadd_vx_u32m8
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
#define VMVVX_UINT __riscv_vmv_v_x_u32m8
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -104,11 +104,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
// index where element less than v_min
|
// index where element less than v_min
|
||||||
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
||||||
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
|
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
|
||||||
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
|
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
|
||||||
|
|
||||||
//update v_min and start_index j
|
//update v_min and start_index j
|
||||||
v_min = VFMINVV_FLOAT(v_min, vx, vl);
|
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -122,11 +122,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
// index where element less than v_min
|
// index where element less than v_min
|
||||||
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
mask = VMFLTVV_FLOAT(vx, v_min, vl);
|
||||||
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
|
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
|
||||||
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
|
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
|
||||||
|
|
||||||
//update v_min and start_index j
|
//update v_min and start_index j
|
||||||
v_min = VFMINVV_FLOAT(v_min, vx, vl);
|
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,13 +44,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
||||||
#define VFIRSTM __riscv_vfirst_m_b16
|
#define VFIRSTM __riscv_vfirst_m_b16
|
||||||
#define UINT_V_T vuint64m4_t
|
#define UINT_V_T vuint64m4_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u64m4
|
#define VIDV_UINT __riscv_vid_v_u64m4
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u64m4
|
#define VADDVX_UINT __riscv_vadd_vx_u64m4
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u64m4
|
#define VMVVX_UINT __riscv_vmv_v_x_u64m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -73,13 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
||||||
#define VFIRSTM __riscv_vfirst_m_b8
|
#define VFIRSTM __riscv_vfirst_m_b8
|
||||||
#define UINT_V_T vuint32m4_t
|
#define UINT_V_T vuint32m4_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u32m4
|
#define VIDV_UINT __riscv_vid_v_u32m4
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u32m4
|
#define VADDVX_UINT __riscv_vadd_vx_u32m4
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u32m4
|
#define VMVVX_UINT __riscv_vmv_v_x_u32m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -116,11 +116,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
//index where element greater than v_max
|
//index where element greater than v_max
|
||||||
mask = VMFLTVV_FLOAT(v_max, vx0, vl);
|
mask = VMFLTVV_FLOAT(v_max, vx0, vl);
|
||||||
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
|
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
|
||||||
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
|
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
|
||||||
|
|
||||||
//update v_max and start_index j
|
//update v_max and start_index j
|
||||||
v_max = VFMAXVV_FLOAT(v_max, vx0, vl);
|
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -138,11 +138,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
//index where element greater than v_max
|
//index where element greater than v_max
|
||||||
mask = VMFLTVV_FLOAT(v_max, vx0, vl);
|
mask = VMFLTVV_FLOAT(v_max, vx0, vl);
|
||||||
v_max_index = VIDV_MASK_UINT(mask, v_max_index, vl);
|
v_max_index = VIDV_MASK_UINT_TU(mask, v_max_index, vl);
|
||||||
v_max_index = VADDVX_MASK_UINT(mask, v_max_index, v_max_index, j, vl);
|
v_max_index = VADDVX_MASK_UINT_TU(mask, v_max_index, v_max_index, j, vl);
|
||||||
|
|
||||||
//update v_max and start_index j
|
//update v_max and start_index j
|
||||||
v_max = VFMAXVV_FLOAT(v_max, vx0, vl);
|
v_max = VFMAXVV_FLOAT_TU(v_max, v_max, vx0, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -43,13 +43,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
||||||
#define VFIRSTM __riscv_vfirst_m_b16
|
#define VFIRSTM __riscv_vfirst_m_b16
|
||||||
#define UINT_V_T vuint64m4_t
|
#define UINT_V_T vuint64m4_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u64m4_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u64m4_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u64m4
|
#define VIDV_UINT __riscv_vid_v_u64m4
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u64m4_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u64m4_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u64m4
|
#define VADDVX_UINT __riscv_vadd_vx_u64m4
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u64m4
|
#define VMVVX_UINT __riscv_vmv_v_x_u64m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -70,13 +70,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
||||||
#define VFIRSTM __riscv_vfirst_m_b8
|
#define VFIRSTM __riscv_vfirst_m_b8
|
||||||
#define UINT_V_T vuint32m4_t
|
#define UINT_V_T vuint32m4_t
|
||||||
#define VIDV_MASK_UINT __riscv_vid_v_u32m4_mu
|
#define VIDV_MASK_UINT_TU __riscv_vid_v_u32m4_tumu
|
||||||
#define VIDV_UINT __riscv_vid_v_u32m4
|
#define VIDV_UINT __riscv_vid_v_u32m4
|
||||||
#define VADDVX_MASK_UINT __riscv_vadd_vx_u32m4_mu
|
#define VADDVX_MASK_UINT_TU __riscv_vadd_vx_u32m4_tumu
|
||||||
#define VADDVX_UINT __riscv_vadd_vx_u32m4
|
#define VADDVX_UINT __riscv_vadd_vx_u32m4
|
||||||
#define VMVVX_UINT __riscv_vmv_v_x_u32m4
|
#define VMVVX_UINT __riscv_vmv_v_x_u32m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -113,11 +113,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
// index where element less than v_min
|
// index where element less than v_min
|
||||||
mask = VMFLTVV_FLOAT(vx0, v_min, vl);
|
mask = VMFLTVV_FLOAT(vx0, v_min, vl);
|
||||||
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
|
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
|
||||||
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
|
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
|
||||||
|
|
||||||
//update v_min and start_index j
|
//update v_min and start_index j
|
||||||
v_min = VFMINVV_FLOAT(v_min, vx0, vl);
|
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -136,11 +136,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
// index where element less than v_min
|
// index where element less than v_min
|
||||||
mask = VMFLTVV_FLOAT(vx0, v_min, vl);
|
mask = VMFLTVV_FLOAT(vx0, v_min, vl);
|
||||||
v_min_index = VIDV_MASK_UINT(mask, v_min_index, vl);
|
v_min_index = VIDV_MASK_UINT_TU(mask, v_min_index, vl);
|
||||||
v_min_index = VADDVX_MASK_UINT(mask, v_min_index, v_min_index, j, vl);
|
v_min_index = VADDVX_MASK_UINT_TU(mask, v_min_index, v_min_index, j, vl);
|
||||||
|
|
||||||
//update v_min and start_index j
|
//update v_min and start_index j
|
||||||
v_min = VFMINVV_FLOAT(v_min, vx0, vl);
|
v_min = VFMINVV_FLOAT_TU(v_min, v_min, vx0, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
|
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m8_f32m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m8_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#else
|
#else
|
||||||
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
||||||
|
@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
|
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m8_f64m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m8
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m8_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
vl = VSETVL(n);
|
vl = VSETVL(n);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(x, vl);
|
vx = VLEV_FLOAT(x, vl);
|
||||||
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
|
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
vl = VSETVL(n);
|
vl = VSETVL(n);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(x, stride_x, vl);
|
vx = VLSEV_FLOAT(x, stride_x, vl);
|
||||||
vmax = VFMAXVV_FLOAT(vmax, vx, vl);
|
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
|
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m8_f32m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m8_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#else
|
#else
|
||||||
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
||||||
|
@ -52,7 +52,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
|
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m8_f64m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m8
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m8_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -75,7 +75,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
vl = VSETVL(n);
|
vl = VSETVL(n);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(x, vl);
|
vx = VLEV_FLOAT(x, vl);
|
||||||
vmin = VFMINVV_FLOAT(vmin, vx, vl);
|
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -86,7 +86,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
vl = VSETVL(n);
|
vl = VSETVL(n);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(x, stride_x, vl);
|
vx = VLSEV_FLOAT(x, stride_x, vl);
|
||||||
vmin = VFMINVV_FLOAT(vmin, vx, vl);
|
vmin = VFMINVV_FLOAT_TU(vmin, vmin, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -79,7 +79,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
v0 = VLEV_FLOAT(x, vl);
|
v0 = VLEV_FLOAT(x, vl);
|
||||||
|
|
||||||
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -91,7 +91,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
v0 = VLSEV_FLOAT(x, stride_x, vl);
|
v0 = VLSEV_FLOAT(x, stride_x, vl);
|
||||||
|
|
||||||
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu
|
||||||
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu
|
||||||
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -73,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
vl = VSETVL(n);
|
vl = VSETVL(n);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(x, vl);
|
vx = VLEV_FLOAT(x, vl);
|
||||||
vsum = VFADDVV_FLOAT(vsum, vx, vl);
|
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
vl = VSETVL(n);
|
vl = VSETVL(n);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(x, stride_x, vl);
|
vx = VLSEV_FLOAT(x, stride_x, vl);
|
||||||
vsum = VFADDVV_FLOAT(vsum, vx, vl);
|
vsum = VFADDVV_FLOAT_TU(vsum, vsum, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
||||||
|
@ -56,7 +56,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
||||||
|
@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
}
|
}
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
||||||
|
@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
}
|
}
|
||||||
|
@ -201,7 +201,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
|
|
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
||||||
|
@ -57,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
||||||
|
@ -101,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
}
|
}
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max);
|
||||||
|
|
||||||
|
@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
}
|
}
|
||||||
|
@ -200,7 +200,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1
|
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f32m4
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f32m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1
|
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMAXVV_FLOAT __riscv_vfmax_vv_f64m4
|
#define VFMAXVV_FLOAT_TU __riscv_vfmax_vv_f64m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
||||||
vmax = VFMAXVV_FLOAT(vmax, v0, vl);
|
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -101,7 +101,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
||||||
vmax = VFMAXVV_FLOAT(vmax, v0, vl);
|
vmax = VFMAXVV_FLOAT_TU(vmax, vmax, v0, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m4_f32m1
|
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f32m4_f32m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f32m4
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f32m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m4_f64m1
|
#define VFREDMINVS_FLOAT __riscv_vfredmin_vs_f64m4_f64m1
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMINVV_FLOAT __riscv_vfmin_vv_f64m4
|
#define VFMINVV_FLOAT_TU __riscv_vfmin_vv_f64m4_tu
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -84,7 +84,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
||||||
vmin = VFMINVV_FLOAT(vmin, v0, vl);
|
vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -100,7 +100,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
v0 = VFADDVV_FLOAT(v0, v1, vl);
|
||||||
vmin = VFMINVV_FLOAT(vmin, v0, vl);
|
vmin = VFMINVV_FLOAT_TU(vmin, vmin, v0, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m8
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m8
|
||||||
#else
|
#else
|
||||||
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
||||||
|
@ -51,7 +51,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m8
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m8_tu
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -75,8 +75,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v0 = VFABSV_FLOAT(v0, vl);
|
v0 = VFABSV_FLOAT(v0, vl);
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -93,8 +93,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v0 = VFABSV_FLOAT(v0, vl);
|
v0 = VFABSV_FLOAT(v0, vl);
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -36,12 +36,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
|
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
|
||||||
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4
|
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f32m4
|
||||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
|
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#else
|
#else
|
||||||
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
|
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
|
||||||
|
@ -52,12 +52,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
|
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
|
||||||
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4
|
#define VFMSACVV_FLOAT __riscv_vfmsac_vv_f64m4
|
||||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
|
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -86,14 +86,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
VLSEG_FLOAT(&vx0, &vx1, x, vl);
|
VLSEG_FLOAT(&vx0, &vx1, x, vl);
|
||||||
VLSEG_FLOAT(&vy0, &vy1, y, vl);
|
VLSEG_FLOAT(&vy0, &vy1, y, vl);
|
||||||
|
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#else
|
#else
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,14 +107,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
VLSEG_FLOAT(&vx0, &vx1, x, vl);
|
VLSEG_FLOAT(&vx0, &vx1, x, vl);
|
||||||
VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl);
|
VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl);
|
||||||
|
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#else
|
#else
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
} else if (inc_y == 1){
|
} else if (inc_y == 1){
|
||||||
|
@ -127,14 +127,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl);
|
VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl);
|
||||||
VLSEG_FLOAT(&vy0, &vy1, y, vl);
|
VLSEG_FLOAT(&vy0, &vy1, y, vl);
|
||||||
|
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#else
|
#else
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}else {
|
}else {
|
||||||
|
@ -148,14 +148,14 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
||||||
VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl);
|
VLSSEG_FLOAT(&vx0, &vx1, x, stride_x, vl);
|
||||||
VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl);
|
VLSSEG_FLOAT(&vy0, &vy1, y, stride_y, vl);
|
||||||
|
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx0, vy0, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx0, vy0, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx0, vy1, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx0, vy1, vl);
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
vr0 = VFNMSACVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFNMSACVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFMACCVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFMACCVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#else
|
#else
|
||||||
vr0 = VFMACCVV_FLOAT(vr0, vx1, vy1, vl);
|
vr0 = VFMACCVV_FLOAT_TU(vr0, vx1, vy1, vl);
|
||||||
vr1 = VFNMSACVV_FLOAT(vr1, vx1, vy0, vl);
|
vr1 = VFNMSACVV_FLOAT_TU(vr1, vx1, vy0, vl);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
|
||||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
|
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
|
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
|
||||||
|
@ -49,8 +49,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
|
||||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
|
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
|
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
|
||||||
|
@ -90,15 +90,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl);
|
VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl);
|
||||||
|
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#else
|
#else
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#endif
|
#endif
|
||||||
j += vl * 2;
|
j += vl * 2;
|
||||||
ix += vl * inc_x * 2;
|
ix += vl * inc_x * 2;
|
||||||
|
@ -134,15 +134,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl);
|
VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl);
|
||||||
|
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#else
|
#else
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#endif
|
#endif
|
||||||
j += vl * 2;
|
j += vl * 2;
|
||||||
ix += vl * inc_x * 2;
|
ix += vl * inc_x * 2;
|
||||||
|
|
|
@ -36,10 +36,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f32m4_f32m1
|
#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f32m4_f32m1_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f32m4
|
||||||
#else
|
#else
|
||||||
|
@ -51,10 +51,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFREDMAXVS_FLOAT __riscv_vfredmax_vs_f64m4_f64m1
|
#define VFREDMAXVS_FLOAT_TU __riscv_vfredmax_vs_f64m4_f64m1_tu
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
#define VFABSV_FLOAT __riscv_vfabs_v_f64m4
|
||||||
#endif
|
#endif
|
||||||
|
@ -85,11 +85,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v0 = VFABSV_FLOAT(v0, vl);
|
v0 = VFABSV_FLOAT(v0, vl);
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v_max = VFREDMAXVS_FLOAT(v0, v_max, vl);
|
v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);
|
||||||
|
|
||||||
v_max = VFREDMAXVS_FLOAT(v1, v_max, vl);
|
v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, v1, v1, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -103,11 +103,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
v0 = VFABSV_FLOAT(v0, vl);
|
v0 = VFABSV_FLOAT(v0, vl);
|
||||||
v1 = VFABSV_FLOAT(v1, vl);
|
v1 = VFABSV_FLOAT(v1, vl);
|
||||||
|
|
||||||
v_max = VFREDMAXVS_FLOAT(v0, v_max, vl);
|
v_max = VFREDMAXVS_FLOAT_TU(v_max, v0, v_max, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, v0, v0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, v0, v0, vl);
|
||||||
|
|
||||||
v_max = VFREDMAXVS_FLOAT(v1, v_max, vl);
|
v_max = VFREDMAXVS_FLOAT_TU(v_max, v1, v_max, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, v1, v1, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, v1, v1, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f32m4_tu
|
||||||
#else
|
#else
|
||||||
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
|
#define VSETVL(n) __riscv_vsetvl_e64m4(n)
|
||||||
#define VSETVL_MAX __riscv_vsetvlmax_e64m4()
|
#define VSETVL_MAX __riscv_vsetvlmax_e64m4()
|
||||||
|
@ -50,7 +50,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
#define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
|
#define VFADDVV_FLOAT_TU __riscv_vfadd_vv_f64m4_tu
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
@ -69,8 +69,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
VLSEG_FLOAT(&v0, &v1, x, vl);
|
VLSEG_FLOAT(&v0, &v1, x, vl);
|
||||||
|
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
@ -82,8 +82,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
VLSSEG_FLOAT(&v0, &v1, x, stride_x, vl);
|
VLSSEG_FLOAT(&v0, &v1, x, stride_x, vl);
|
||||||
|
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v0, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v0, vl);
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v1, vl);
|
v_sum = VFADDVV_FLOAT_TU(v_sum, v_sum, v1, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue