Fix BLAS and LAPACK tests for C910V and RISCV64_ZVL256B targets
* Fixed bugs in dgemm, [a]min\max, asum kernels * Added zero checks for BLAS kernels * Added dsdot implementation for RVV 0.7.1 * Fixed bugs in _vector files for C910V and RISCV64_ZVL256B targets * Added additional definitions for RISCV64_ZVL256B target
This commit is contained in:
parent
88e994116c
commit
e1afb23811
|
@ -59,6 +59,10 @@ ifeq ($(TARGET), x280)
|
|||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_ZVL256B)
|
||||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_GENERIC)
|
||||
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
|
||||
endif
|
||||
|
|
|
@ -6,6 +6,10 @@ ifeq ($(CORE), x280)
|
|||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
|
||||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_ZVL256B)
|
||||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl256b -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_GENERIC)
|
||||
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
|
||||
|
|
|
@ -121,6 +121,7 @@ Z14
|
|||
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
|
||||
C910V
|
||||
x280
|
||||
RISCV64_ZVL256B
|
||||
|
||||
11.LOONGARCH64:
|
||||
LOONGSONGENERIC
|
||||
|
|
14
getarch.c
14
getarch.c
|
@ -1692,6 +1692,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_RISCV64_ZVL256B
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "RISCV64"
|
||||
#define SUBARCHITECTURE "RISCV64_ZVL256B"
|
||||
#define SUBDIRNAME "riscv64"
|
||||
#define ARCHCONFIG "-DRISCV64_ZVL256B " \
|
||||
"-DL1_DATA_SIZE=64536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||
#define LIBNAME "riscv64_zvl256b"
|
||||
#define CORENAME "RISCV64_ZVL256B"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(FORCE_E2K) || defined(__e2k__)
|
||||
#define FORCE
|
||||
|
|
|
@ -59,6 +59,7 @@ SDOTKERNEL = dot_vector.c
|
|||
DDOTKERNEL = dot_vector.c
|
||||
CDOTKERNEL = zdot_vector.c
|
||||
ZDOTKERNEL = zdot_vector.c
|
||||
DSDOTKERNEL = dsdot_vector.c
|
||||
|
||||
SNRM2KERNEL = nrm2_vector.c
|
||||
DNRM2KERNEL = nrm2_vector.c
|
||||
|
|
|
@ -31,15 +31,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
# define LMUL m2
|
||||
# if defined(DOUBLE)
|
||||
# define ELEN 64
|
||||
# define ABS fabs
|
||||
# else
|
||||
# define ELEN 32
|
||||
# define ABS fabsf
|
||||
# endif
|
||||
#else
|
||||
# define LMUL m8
|
||||
# if defined(DOUBLE)
|
||||
# define ELEN 64
|
||||
# define ABS fabs
|
||||
# else
|
||||
# define ELEN 32
|
||||
# define ABS fabsf
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
@ -69,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
FLOAT minf=0.0;
|
||||
if (n <= 0 || inc_x <= 0) return(minf);
|
||||
|
||||
minf = *x;
|
||||
minf = ABS(*x);
|
||||
x += inc_x;
|
||||
--n;
|
||||
if (n == 0) return(minf);
|
||||
|
|
|
@ -67,7 +67,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
BLASLONG ix=0;
|
||||
FLOAT asumf=0.0;
|
||||
if (n <= 0 || inc_x <= 0) return(asumf);
|
||||
unsigned int gvl = 0;
|
||||
|
@ -103,17 +102,15 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||
if(gvl <= n/2){
|
||||
v_sum = VFMVVF_FLOAT(0, gvl);
|
||||
BLASLONG inc_xv = inc_x * gvl;
|
||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
v0 = VFABS_FLOAT(v0, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
|
||||
|
||||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[(j+gvl)*inc_x], stride_x, gvl);
|
||||
v1 = VFABS_FLOAT(v1, gvl);
|
||||
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
|
||||
j += gvl * 2;
|
||||
inc_xv += inc_xv * 2;
|
||||
}
|
||||
v_res = VFREDSUMVS_FLOAT(v_sum, v_res, gvl);
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
if (n < 0) return(0);
|
||||
if (n <= 0) return(0);
|
||||
|
||||
BLASLONG i=0, j=0;
|
||||
unsigned int gvl = 0;
|
||||
|
|
|
@ -196,7 +196,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
|
|||
|
||||
asm volatile(
|
||||
"vsetvli zero, zero, e64,m1 \n\t"
|
||||
"fmv.w.x ft11, zero \n\t"
|
||||
"fmv.d.x ft11, zero \n\t"
|
||||
"mv t0, %[BK] \n\t"
|
||||
|
||||
"vfmv.v.f v16, ft11 \n\t"
|
||||
|
|
|
@ -0,0 +1,152 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2023, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
double dot = 0.0 ;
|
||||
|
||||
if ( n < 1 ) return(dot);
|
||||
vfloat64m4_t vr;
|
||||
vfloat32m2_t vx, vy;
|
||||
unsigned int gvl = 0;
|
||||
vfloat64m1_t v_res, v_z0;
|
||||
gvl = vsetvlmax_e64m1();
|
||||
v_res = vfmv_v_f_f64m1(0, gvl);
|
||||
v_z0 = vfmv_v_f_f64m1(0, gvl);
|
||||
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
gvl = vsetvl_e64m4(n);
|
||||
vr = vfmv_v_f_f64m4(0, gvl);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = vle32_v_f32m2(&x[j], gvl);
|
||||
vy = vle32_v_f32m2(&y[j], gvl);
|
||||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvl_e64m4(n-j);
|
||||
vx = vle32_v_f32m2(&x[j], gvl);
|
||||
vy = vle32_v_f32m2(&y[j], gvl);
|
||||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
}
|
||||
}else if(inc_y == 1){
|
||||
gvl = vsetvl_e64m4(n);
|
||||
vr = vfmv_v_f_f64m4(0, gvl);
|
||||
int stride_x = inc_x * sizeof(FLOAT);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||
vy = vle32_v_f32m2(&y[j], gvl);
|
||||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvl_e64m4(n-j);
|
||||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||
vy = vle32_v_f32m2(&y[j], gvl);
|
||||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
gvl = vsetvl_e64m4(n);
|
||||
vr = vfmv_v_f_f64m4(0, gvl);
|
||||
int stride_y = inc_y * sizeof(FLOAT);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = vle32_v_f32m2(&x[j], gvl);
|
||||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvl_e64m4(n-j);
|
||||
vx = vle32_v_f32m2(&x[j], gvl);
|
||||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
|
||||
}
|
||||
}else{
|
||||
gvl = vsetvl_e64m4(n);
|
||||
vr = vfmv_v_f_f64m4(0, gvl);
|
||||
int stride_x = inc_x * sizeof(FLOAT);
|
||||
int stride_y = inc_y * sizeof(FLOAT);
|
||||
for(i=0,j=0; i<n/gvl; i++){
|
||||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
if(j > 0){
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
|
||||
}
|
||||
//tail
|
||||
if(j < n){
|
||||
gvl = vsetvl_e64m4(n-j);
|
||||
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||
|
||||
}
|
||||
}
|
||||
return(dot);
|
||||
}
|
|
@ -139,7 +139,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
|
||||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
||||
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
||||
if(cur_minf > minf){
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
|
@ -185,7 +185,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
|
||||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
||||
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
||||
if(cur_minf > minf){
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
|
|
|
@ -156,7 +156,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
|
||||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
||||
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
||||
if(cur_minf > minf){
|
||||
if(cur_minf < minf){
|
||||
//tail index
|
||||
v_min_index = VIDV_UINT(gvl);
|
||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||
|
|
|
@ -104,7 +104,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
{
|
||||
BLASLONG i=0;
|
||||
|
||||
if(n <= 0) return(0.0);
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
if(n == 1) return (ABS(x[0]));
|
||||
|
||||
unsigned int gvl = 0;
|
||||
|
|
|
@ -61,7 +61,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
BLASLONG i=0, j=0;
|
||||
double len = 0.0 ;
|
||||
|
||||
if ( n < 0 ) return(0.0);
|
||||
if ( n <= 0 ) return(0.0);
|
||||
if(n == 1) return (ABS(x[0]));
|
||||
|
||||
FLOAT_V_T vr, v0, v1;
|
||||
|
|
|
@ -67,7 +67,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
|
|||
BLASLONG stride_x, stride_y;
|
||||
FLOAT_V_T vx0, vx1, vy0, vy1;
|
||||
|
||||
if (n < 0) return(0);
|
||||
if (n <= 0) return(0);
|
||||
|
||||
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1);
|
||||
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; }
|
||||
|
|
|
@ -60,17 +60,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
|
||||
#ifdef RISCV_0p10_INTRINSICS
|
||||
#define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
|
||||
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl)
|
||||
#else
|
||||
#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
||||
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m)
|
||||
#endif
|
||||
#define MASK_T JOIN(vbool, MLEN, _t, _, _)
|
||||
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
|
||||
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
|
||||
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
|
||||
#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _)
|
||||
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
|
||||
#define VFABSV_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
|
@ -91,10 +89,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
for(; i<n/gvl; i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
||||
|
||||
v0 = VFABSV_FLOAT(v0, gvl);
|
||||
v1 = VFABSV_FLOAT(v1, gvl);
|
||||
|
||||
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
||||
|
@ -108,10 +105,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
gvl = VSETVL(n-j);
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
||||
v0 = VFABSV_FLOAT(v0, gvl);
|
||||
v1 = VFABSV_FLOAT(v1, gvl);
|
||||
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v_res = VFREDMAXVS_FLOAT(v1, v_res, gvl);
|
||||
}
|
||||
|
|
|
@ -62,17 +62,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
|
||||
#ifdef RISCV_0p10_INTRINSICS
|
||||
#define VFREDMINVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
|
||||
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl)
|
||||
#else
|
||||
#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
||||
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m)
|
||||
#endif
|
||||
#define MASK_T JOIN(vbool, MLEN, _t, _, _)
|
||||
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
|
||||
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
|
||||
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
|
||||
#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _)
|
||||
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
|
||||
#define VFABSV_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
|
@ -93,10 +91,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
for(; i<n/gvl; i++){
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
||||
|
||||
v0 = VFABSV_FLOAT(v0, gvl);
|
||||
v1 = VFABSV_FLOAT(v1, gvl);
|
||||
|
||||
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
||||
|
@ -110,10 +107,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
gvl = VSETVL(n-j);
|
||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
||||
v0 = VFABSV_FLOAT(v0, gvl);
|
||||
v1 = VFABSV_FLOAT(v1, gvl);
|
||||
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||
v_res = VFREDMINVS_FLOAT(v1, v_res, gvl);
|
||||
}
|
||||
|
|
|
@ -96,7 +96,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
{
|
||||
BLASLONG i=0;
|
||||
|
||||
if(n < 0) return(0.0);
|
||||
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||
|
||||
FLOAT_V_T v_ssq, v_scale, v0, v1, v_zero;
|
||||
unsigned int gvl = 0;
|
||||
|
|
|
@ -69,7 +69,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
|
|||
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1);
|
||||
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; }
|
||||
|
||||
if (n < 0) return(0);
|
||||
if (n <= 0) return(0);
|
||||
if(inc_x == 1 && inc_y == 1){
|
||||
BLASLONG n2 = n * 2;
|
||||
if(gvl <= n2/2){
|
||||
|
|
Loading…
Reference in New Issue