Adds tail undisturbed for RVV Level 2 operations
During the last iteration of some RVV operations, accumulators can get overwritten when VL < VLMAX and tail policy is agnostic. Commit changes intrinsics tail policy to undistrubed.
This commit is contained in:
parent
8df0289db6
commit
826a9d5fa4
|
@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f32m1_f32
|
||||||
|
@ -49,7 +49,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
#define VFMVFS_FLOAT_M1 __riscv_vfmv_f_s_f64m1_f64
|
||||||
|
@ -79,7 +79,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
|
|
||||||
va = VLEV_FLOAT(a_ptr, vl);
|
va = VLEV_FLOAT(a_ptr, vl);
|
||||||
vx = VLEV_FLOAT(x_ptr, vl);
|
vx = VLEV_FLOAT(x_ptr, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
||||||
|
@ -103,7 +103,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
|
||||||
|
|
||||||
va = VLEV_FLOAT(a_ptr, vl);
|
va = VLEV_FLOAT(a_ptr, vl);
|
||||||
vx = VLSEV_FLOAT(x_ptr, stride_x, vl);
|
vx = VLSEV_FLOAT(x_ptr, stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va, vx, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va, vx, vl);
|
||||||
}
|
}
|
||||||
|
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
||||||
|
|
|
@ -37,7 +37,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
||||||
|
@ -56,7 +56,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
||||||
|
@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
}
|
}
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vlmax);
|
||||||
|
@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
}
|
}
|
||||||
|
@ -201,7 +201,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
|
|
|
@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f32m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
||||||
|
@ -57,7 +57,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
||||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m8
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m8_tu
|
||||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
||||||
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
#define VFNMSACVF_FLOAT __riscv_vfnmsac_vf_f64m8
|
||||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
||||||
|
@ -101,7 +101,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
}
|
}
|
||||||
v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max);
|
v_res = VFREDSUM_FLOAT(vr, v_z0, vl_max);
|
||||||
|
|
||||||
|
@ -130,7 +130,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLEV_FLOAT(&x[i], vl);
|
vx = VLEV_FLOAT(&x[i], vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
}
|
}
|
||||||
|
@ -163,7 +163,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSEV_FLOAT(&y[i], vy, vl);
|
VSEV_FLOAT(&y[i], vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
|
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
}
|
}
|
||||||
|
@ -200,7 +200,7 @@ int CNAME(BLASLONG m, BLASLONG offset, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOA
|
||||||
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
VSSEV_FLOAT(&y[iy], stride_y, vy, vl);
|
||||||
|
|
||||||
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
vx = VLSEV_FLOAT(&x[ix], stride_x, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, vx, va, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, vx, va, vl);
|
||||||
ix += inc_xv;
|
ix += inc_xv;
|
||||||
iy += inc_yv;
|
iy += inc_yv;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,8 +35,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
#define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e32_v_f32m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f32m4_f32m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f32m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f32m4_tu
|
||||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f32m4
|
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f32m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
|
#define VFMULVV_FLOAT __riscv_vfmul_vv_f32m4
|
||||||
|
@ -49,8 +49,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
#define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4
|
||||||
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
#define VLSSEG_FLOAT __riscv_vlsseg2e64_v_f64m4
|
||||||
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
#define VFREDSUM_FLOAT __riscv_vfredusum_vs_f64m4_f64m1
|
||||||
#define VFMACCVV_FLOAT __riscv_vfmacc_vv_f64m4
|
#define VFMACCVV_FLOAT_TU __riscv_vfmacc_vv_f64m4_tu
|
||||||
#define VFNMSACVV_FLOAT __riscv_vfnmsac_vv_f64m4
|
#define VFNMSACVV_FLOAT_TU __riscv_vfnmsac_vv_f64m4_tu
|
||||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
|
||||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||||
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
|
#define VFMULVV_FLOAT __riscv_vfmul_vv_f64m4
|
||||||
|
@ -90,15 +90,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl);
|
VLSEG_FLOAT(&vx0, &vx1, &x[ix], vl);
|
||||||
|
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#else
|
#else
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#endif
|
#endif
|
||||||
j += vl * 2;
|
j += vl * 2;
|
||||||
ix += vl * inc_x * 2;
|
ix += vl * inc_x * 2;
|
||||||
|
@ -134,15 +134,15 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
|
||||||
VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl);
|
VLSSEG_FLOAT(&vx0, &vx1, &x[ix], stride_x, vl);
|
||||||
|
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFNMSACVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFNMSACVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#else
|
#else
|
||||||
vr = VFMACCVV_FLOAT(vr, va0, vx0, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va0, vx0, vl);
|
||||||
vr = VFMACCVV_FLOAT(vr, va1, vx1, vl);
|
vr = VFMACCVV_FLOAT_TU(vr, va1, vx1, vl);
|
||||||
vi = VFMACCVV_FLOAT(vi, va0, vx1, vl);
|
vi = VFMACCVV_FLOAT_TU(vi, va0, vx1, vl);
|
||||||
vi = VFNMSACVV_FLOAT(vi, va1, vx0, vl);
|
vi = VFNMSACVV_FLOAT_TU(vi, va1, vx0, vl);
|
||||||
#endif
|
#endif
|
||||||
j += vl * 2;
|
j += vl * 2;
|
||||||
ix += vl * inc_x * 2;
|
ix += vl * inc_x * 2;
|
||||||
|
|
Loading…
Reference in New Issue