Fix BLAS and LAPACK tests for C910V and RISCV64_ZVL256B targets
* Fixed bugs in dgemm, [a]min\max, asum kernels * Added zero checks for BLAS kernels * Added dsdot implementation for RVV 0.7.1 * Fixed bugs in _vector files for C910V and RISCV64_ZVL256B targets * Added additional definitions for RISCV64_ZVL256B target
This commit is contained in:
parent
88e994116c
commit
e1afb23811
|
@ -59,6 +59,10 @@ ifeq ($(TARGET), x280)
|
||||||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), RISCV64_ZVL256B)
|
||||||
|
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(TARGET), RISCV64_GENERIC)
|
ifeq ($(TARGET), RISCV64_GENERIC)
|
||||||
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
|
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -6,6 +6,10 @@ ifeq ($(CORE), x280)
|
||||||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
|
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
|
||||||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(CORE), RISCV64_ZVL256B)
|
||||||
|
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl256b -mabi=lp64d
|
||||||
|
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
||||||
|
endif
|
||||||
ifeq ($(CORE), RISCV64_GENERIC)
|
ifeq ($(CORE), RISCV64_GENERIC)
|
||||||
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
||||||
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
|
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
|
||||||
|
|
|
@ -121,6 +121,7 @@ Z14
|
||||||
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
|
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
|
||||||
C910V
|
C910V
|
||||||
x280
|
x280
|
||||||
|
RISCV64_ZVL256B
|
||||||
|
|
||||||
11.LOONGARCH64:
|
11.LOONGARCH64:
|
||||||
LOONGSONGENERIC
|
LOONGSONGENERIC
|
||||||
|
|
14
getarch.c
14
getarch.c
|
@ -1692,6 +1692,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_RISCV64_ZVL256B
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "RISCV64"
|
||||||
|
#define SUBARCHITECTURE "RISCV64_ZVL256B"
|
||||||
|
#define SUBDIRNAME "riscv64"
|
||||||
|
#define ARCHCONFIG "-DRISCV64_ZVL256B " \
|
||||||
|
"-DL1_DATA_SIZE=64536 -DL1_DATA_LINESIZE=32 " \
|
||||||
|
"-DL2_SIZE=262144 -DL2_LINESIZE=32 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||||
|
#define LIBNAME "riscv64_zvl256b"
|
||||||
|
#define CORENAME "RISCV64_ZVL256B"
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(FORCE_E2K) || defined(__e2k__)
|
#if defined(FORCE_E2K) || defined(__e2k__)
|
||||||
#define FORCE
|
#define FORCE
|
||||||
|
|
|
@ -59,6 +59,7 @@ SDOTKERNEL = dot_vector.c
|
||||||
DDOTKERNEL = dot_vector.c
|
DDOTKERNEL = dot_vector.c
|
||||||
CDOTKERNEL = zdot_vector.c
|
CDOTKERNEL = zdot_vector.c
|
||||||
ZDOTKERNEL = zdot_vector.c
|
ZDOTKERNEL = zdot_vector.c
|
||||||
|
DSDOTKERNEL = dsdot_vector.c
|
||||||
|
|
||||||
SNRM2KERNEL = nrm2_vector.c
|
SNRM2KERNEL = nrm2_vector.c
|
||||||
DNRM2KERNEL = nrm2_vector.c
|
DNRM2KERNEL = nrm2_vector.c
|
||||||
|
|
|
@ -31,15 +31,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
# define LMUL m2
|
# define LMUL m2
|
||||||
# if defined(DOUBLE)
|
# if defined(DOUBLE)
|
||||||
# define ELEN 64
|
# define ELEN 64
|
||||||
|
# define ABS fabs
|
||||||
# else
|
# else
|
||||||
# define ELEN 32
|
# define ELEN 32
|
||||||
|
# define ABS fabsf
|
||||||
# endif
|
# endif
|
||||||
#else
|
#else
|
||||||
# define LMUL m8
|
# define LMUL m8
|
||||||
# if defined(DOUBLE)
|
# if defined(DOUBLE)
|
||||||
# define ELEN 64
|
# define ELEN 64
|
||||||
|
# define ABS fabs
|
||||||
# else
|
# else
|
||||||
# define ELEN 32
|
# define ELEN 32
|
||||||
|
# define ABS fabsf
|
||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -69,7 +73,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
FLOAT minf=0.0;
|
FLOAT minf=0.0;
|
||||||
if (n <= 0 || inc_x <= 0) return(minf);
|
if (n <= 0 || inc_x <= 0) return(minf);
|
||||||
|
|
||||||
minf = *x;
|
minf = ABS(*x);
|
||||||
x += inc_x;
|
x += inc_x;
|
||||||
--n;
|
--n;
|
||||||
if (n == 0) return(minf);
|
if (n == 0) return(minf);
|
||||||
|
|
|
@ -67,7 +67,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
{
|
{
|
||||||
BLASLONG i=0, j=0;
|
BLASLONG i=0, j=0;
|
||||||
BLASLONG ix=0;
|
|
||||||
FLOAT asumf=0.0;
|
FLOAT asumf=0.0;
|
||||||
if (n <= 0 || inc_x <= 0) return(asumf);
|
if (n <= 0 || inc_x <= 0) return(asumf);
|
||||||
unsigned int gvl = 0;
|
unsigned int gvl = 0;
|
||||||
|
@ -103,17 +102,15 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
unsigned int stride_x = inc_x * sizeof(FLOAT);
|
||||||
if(gvl <= n/2){
|
if(gvl <= n/2){
|
||||||
v_sum = VFMVVF_FLOAT(0, gvl);
|
v_sum = VFMVVF_FLOAT(0, gvl);
|
||||||
BLASLONG inc_xv = inc_x * gvl;
|
|
||||||
for(i=0,j=0; i<n/(gvl*2); i++){
|
for(i=0,j=0; i<n/(gvl*2); i++){
|
||||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||||
v0 = VFABS_FLOAT(v0, gvl);
|
v0 = VFABS_FLOAT(v0, gvl);
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
|
v_sum = VFADDVV_FLOAT(v_sum, v0, gvl);
|
||||||
|
|
||||||
v1 = VLSEV_FLOAT(&x[ix+inc_xv], stride_x, gvl);
|
v1 = VLSEV_FLOAT(&x[(j+gvl)*inc_x], stride_x, gvl);
|
||||||
v1 = VFABS_FLOAT(v1, gvl);
|
v1 = VFABS_FLOAT(v1, gvl);
|
||||||
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
|
v_sum = VFADDVV_FLOAT(v_sum, v1, gvl);
|
||||||
j += gvl * 2;
|
j += gvl * 2;
|
||||||
inc_xv += inc_xv * 2;
|
|
||||||
}
|
}
|
||||||
v_res = VFREDSUMVS_FLOAT(v_sum, v_res, gvl);
|
v_res = VFREDSUMVS_FLOAT(v_sum, v_res, gvl);
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,7 +60,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||||
{
|
{
|
||||||
if (n < 0) return(0);
|
if (n <= 0) return(0);
|
||||||
|
|
||||||
BLASLONG i=0, j=0;
|
BLASLONG i=0, j=0;
|
||||||
unsigned int gvl = 0;
|
unsigned int gvl = 0;
|
||||||
|
|
|
@ -196,7 +196,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
|
||||||
|
|
||||||
asm volatile(
|
asm volatile(
|
||||||
"vsetvli zero, zero, e64,m1 \n\t"
|
"vsetvli zero, zero, e64,m1 \n\t"
|
||||||
"fmv.w.x ft11, zero \n\t"
|
"fmv.d.x ft11, zero \n\t"
|
||||||
"mv t0, %[BK] \n\t"
|
"mv t0, %[BK] \n\t"
|
||||||
|
|
||||||
"vfmv.v.f v16, ft11 \n\t"
|
"vfmv.v.f v16, ft11 \n\t"
|
||||||
|
|
|
@ -0,0 +1,152 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2023, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
|
{
|
||||||
|
BLASLONG i=0, j=0;
|
||||||
|
double dot = 0.0 ;
|
||||||
|
|
||||||
|
if ( n < 1 ) return(dot);
|
||||||
|
vfloat64m4_t vr;
|
||||||
|
vfloat32m2_t vx, vy;
|
||||||
|
unsigned int gvl = 0;
|
||||||
|
vfloat64m1_t v_res, v_z0;
|
||||||
|
gvl = vsetvlmax_e64m1();
|
||||||
|
v_res = vfmv_v_f_f64m1(0, gvl);
|
||||||
|
v_z0 = vfmv_v_f_f64m1(0, gvl);
|
||||||
|
|
||||||
|
if(inc_x == 1 && inc_y == 1){
|
||||||
|
gvl = vsetvl_e64m4(n);
|
||||||
|
vr = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
for(i=0,j=0; i<n/gvl; i++){
|
||||||
|
vx = vle32_v_f32m2(&x[j], gvl);
|
||||||
|
vy = vle32_v_f32m2(&y[j], gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||||
|
j += gvl;
|
||||||
|
}
|
||||||
|
if(j > 0){
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
}
|
||||||
|
//tail
|
||||||
|
if(j < n){
|
||||||
|
gvl = vsetvl_e64m4(n-j);
|
||||||
|
vx = vle32_v_f32m2(&x[j], gvl);
|
||||||
|
vy = vle32_v_f32m2(&y[j], gvl);
|
||||||
|
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
}
|
||||||
|
}else if(inc_y == 1){
|
||||||
|
gvl = vsetvl_e64m4(n);
|
||||||
|
vr = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
int stride_x = inc_x * sizeof(FLOAT);
|
||||||
|
for(i=0,j=0; i<n/gvl; i++){
|
||||||
|
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||||
|
vy = vle32_v_f32m2(&y[j], gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||||
|
j += gvl;
|
||||||
|
}
|
||||||
|
if(j > 0){
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
|
||||||
|
}
|
||||||
|
//tail
|
||||||
|
if(j < n){
|
||||||
|
gvl = vsetvl_e64m4(n-j);
|
||||||
|
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||||
|
vy = vle32_v_f32m2(&y[j], gvl);
|
||||||
|
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
|
||||||
|
}
|
||||||
|
}else if(inc_x == 1){
|
||||||
|
gvl = vsetvl_e64m4(n);
|
||||||
|
vr = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
int stride_y = inc_y * sizeof(FLOAT);
|
||||||
|
for(i=0,j=0; i<n/gvl; i++){
|
||||||
|
vx = vle32_v_f32m2(&x[j], gvl);
|
||||||
|
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||||
|
j += gvl;
|
||||||
|
}
|
||||||
|
if(j > 0){
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
|
||||||
|
}
|
||||||
|
//tail
|
||||||
|
if(j < n){
|
||||||
|
gvl = vsetvl_e64m4(n-j);
|
||||||
|
vx = vle32_v_f32m2(&x[j], gvl);
|
||||||
|
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||||
|
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
|
||||||
|
}
|
||||||
|
}else{
|
||||||
|
gvl = vsetvl_e64m4(n);
|
||||||
|
vr = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
int stride_x = inc_x * sizeof(FLOAT);
|
||||||
|
int stride_y = inc_y * sizeof(FLOAT);
|
||||||
|
for(i=0,j=0; i<n/gvl; i++){
|
||||||
|
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||||
|
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
|
||||||
|
j += gvl;
|
||||||
|
}
|
||||||
|
if(j > 0){
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
|
||||||
|
}
|
||||||
|
//tail
|
||||||
|
if(j < n){
|
||||||
|
gvl = vsetvl_e64m4(n-j);
|
||||||
|
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
|
||||||
|
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
|
||||||
|
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
|
||||||
|
//vr = vfdot_vv_f32m2(vx, vy, gvl);
|
||||||
|
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
|
||||||
|
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
|
||||||
|
dot += (double)vfmv_f_s_f64m1_f64(v_res);
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(dot);
|
||||||
|
}
|
|
@ -139,7 +139,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
||||||
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
||||||
if(cur_minf > minf){
|
if(cur_minf < minf){
|
||||||
//tail index
|
//tail index
|
||||||
v_min_index = VIDV_UINT(gvl);
|
v_min_index = VIDV_UINT(gvl);
|
||||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||||
|
@ -185,7 +185,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
||||||
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
||||||
if(cur_minf > minf){
|
if(cur_minf < minf){
|
||||||
//tail index
|
//tail index
|
||||||
v_min_index = VIDV_UINT(gvl);
|
v_min_index = VIDV_UINT(gvl);
|
||||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||||
|
|
|
@ -156,7 +156,7 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
|
|
||||||
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
v_res = VFREDMINVS_FLOAT(v_min, v_res, gvl);
|
||||||
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
FLOAT cur_minf = EXTRACT_FLOAT(v_res);
|
||||||
if(cur_minf > minf){
|
if(cur_minf < minf){
|
||||||
//tail index
|
//tail index
|
||||||
v_min_index = VIDV_UINT(gvl);
|
v_min_index = VIDV_UINT(gvl);
|
||||||
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
v_min_index = VADDVX_UINT(v_min_index, j, gvl);
|
||||||
|
|
|
@ -104,7 +104,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
{
|
{
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
|
|
||||||
if(n <= 0) return(0.0);
|
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||||
if(n == 1) return (ABS(x[0]));
|
if(n == 1) return (ABS(x[0]));
|
||||||
|
|
||||||
unsigned int gvl = 0;
|
unsigned int gvl = 0;
|
||||||
|
|
|
@ -61,7 +61,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
BLASLONG i=0, j=0;
|
BLASLONG i=0, j=0;
|
||||||
double len = 0.0 ;
|
double len = 0.0 ;
|
||||||
|
|
||||||
if ( n < 0 ) return(0.0);
|
if ( n <= 0 ) return(0.0);
|
||||||
if(n == 1) return (ABS(x[0]));
|
if(n == 1) return (ABS(x[0]));
|
||||||
|
|
||||||
FLOAT_V_T vr, v0, v1;
|
FLOAT_V_T vr, v0, v1;
|
||||||
|
|
|
@ -67,7 +67,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
|
||||||
BLASLONG stride_x, stride_y;
|
BLASLONG stride_x, stride_y;
|
||||||
FLOAT_V_T vx0, vx1, vy0, vy1;
|
FLOAT_V_T vx0, vx1, vy0, vy1;
|
||||||
|
|
||||||
if (n < 0) return(0);
|
if (n <= 0) return(0);
|
||||||
|
|
||||||
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1);
|
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1);
|
||||||
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; }
|
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; }
|
||||||
|
|
|
@ -60,17 +60,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
|
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
|
||||||
#ifdef RISCV_0p10_INTRINSICS
|
#ifdef RISCV_0p10_INTRINSICS
|
||||||
#define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
|
#define VFREDMAXVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
|
||||||
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl)
|
|
||||||
#else
|
#else
|
||||||
#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
#define VFREDMAXVS_FLOAT JOIN(RISCV_RVV(vfredmax_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
||||||
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m)
|
|
||||||
#endif
|
#endif
|
||||||
#define MASK_T JOIN(vbool, MLEN, _t, _, _)
|
#define MASK_T JOIN(vbool, MLEN, _t, _, _)
|
||||||
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
|
|
||||||
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
|
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
|
||||||
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
|
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
|
||||||
#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _)
|
#define VFMAXVV_FLOAT JOIN(RISCV_RVV(vfmax), _vv_f, ELEN, LMUL, _)
|
||||||
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
|
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
|
||||||
|
#define VFABSV_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
|
||||||
|
|
||||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
{
|
{
|
||||||
|
@ -91,10 +89,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
for(; i<n/gvl; i++){
|
for(; i<n/gvl; i++){
|
||||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
|
||||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
v0 = VFABSV_FLOAT(v0, gvl);
|
||||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
v1 = VFABSV_FLOAT(v1, gvl);
|
||||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
|
||||||
|
|
||||||
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||||
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
v_max = VFMAXVV_FLOAT(v_max, v0, gvl);
|
||||||
|
@ -108,10 +105,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
gvl = VSETVL(n-j);
|
gvl = VSETVL(n-j);
|
||||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
v0 = VFABSV_FLOAT(v0, gvl);
|
||||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
v1 = VFABSV_FLOAT(v1, gvl);
|
||||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
|
||||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
|
||||||
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||||
v_res = VFREDMAXVS_FLOAT(v1, v_res, gvl);
|
v_res = VFREDMAXVS_FLOAT(v1, v_res, gvl);
|
||||||
}
|
}
|
||||||
|
|
|
@ -62,17 +62,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
|
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
|
||||||
#ifdef RISCV_0p10_INTRINSICS
|
#ifdef RISCV_0p10_INTRINSICS
|
||||||
#define VFREDMINVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
|
#define VFREDMINVS_FLOAT(va,vb,gvl) JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1)) (v_res, va, vb, gvl)
|
||||||
#define VFRSUBVF_MASK_FLOAT(va,vb,c,gvl) JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m) (va, vb, vb, c, gvl)
|
|
||||||
#else
|
#else
|
||||||
#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
#define VFREDMINVS_FLOAT JOIN(RISCV_RVV(vfredmin_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
||||||
#define VFRSUBVF_MASK_FLOAT JOIN(RISCV_RVV(vfrsub),_vf_f, ELEN, LMUL, _m)
|
|
||||||
#endif
|
#endif
|
||||||
#define MASK_T JOIN(vbool, MLEN, _t, _, _)
|
#define MASK_T JOIN(vbool, MLEN, _t, _, _)
|
||||||
#define VMFLTVF_FLOAT JOIN(RISCV_RVV(vmflt_vf_f), ELEN, LMUL, _b, MLEN)
|
|
||||||
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
|
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
|
||||||
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
|
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
|
||||||
#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _)
|
#define VFMINVV_FLOAT JOIN(RISCV_RVV(vfmin), _vv_f, ELEN, LMUL, _)
|
||||||
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
|
#define VFADDVV_FLOAT JOIN(RISCV_RVV(vfadd), _vv_f, ELEN, LMUL, _)
|
||||||
|
#define VFABSV_FLOAT JOIN(RISCV_RVV(vfabs), _v_f, ELEN, LMUL, _)
|
||||||
|
|
||||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
{
|
{
|
||||||
|
@ -93,10 +91,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
for(; i<n/gvl; i++){
|
for(; i<n/gvl; i++){
|
||||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
|
||||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
v0 = VFABSV_FLOAT(v0, gvl);
|
||||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
v1 = VFABSV_FLOAT(v1, gvl);
|
||||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
|
||||||
|
|
||||||
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
v0 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||||
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
v_min = VFMINVV_FLOAT(v_min, v0, gvl);
|
||||||
|
@ -110,10 +107,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
gvl = VSETVL(n-j);
|
gvl = VSETVL(n-j);
|
||||||
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
v0 = VLSEV_FLOAT(&x[ix], stride_x, gvl);
|
||||||
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
v1 = VLSEV_FLOAT(&x[ix+1], stride_x, gvl);
|
||||||
mask0 = VMFLTVF_FLOAT(v0, 0, gvl);
|
v0 = VFABSV_FLOAT(v0, gvl);
|
||||||
v0 = VFRSUBVF_MASK_FLOAT(mask0, v0, 0, gvl);
|
v1 = VFABSV_FLOAT(v1, gvl);
|
||||||
mask1 = VMFLTVF_FLOAT(v1, 0, gvl);
|
|
||||||
v1 = VFRSUBVF_MASK_FLOAT(mask1, v1, 0, gvl);
|
|
||||||
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
v1 = VFADDVV_FLOAT(v0, v1, gvl);
|
||||||
v_res = VFREDMINVS_FLOAT(v1, v_res, gvl);
|
v_res = VFREDMINVS_FLOAT(v1, v_res, gvl);
|
||||||
}
|
}
|
||||||
|
|
|
@ -96,7 +96,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||||
{
|
{
|
||||||
BLASLONG i=0;
|
BLASLONG i=0;
|
||||||
|
|
||||||
if(n < 0) return(0.0);
|
if (n <= 0 || inc_x <= 0) return(0.0);
|
||||||
|
|
||||||
FLOAT_V_T v_ssq, v_scale, v0, v1, v_zero;
|
FLOAT_V_T v_ssq, v_scale, v0, v1, v_zero;
|
||||||
unsigned int gvl = 0;
|
unsigned int gvl = 0;
|
||||||
|
|
|
@ -69,7 +69,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
|
||||||
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1);
|
unsigned int gvl = VSETVL((inc_x != 0 && inc_y != 0) ? n : 1);
|
||||||
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; }
|
if( inc_x == 0 && inc_y == 0 ) { n = n & 1; }
|
||||||
|
|
||||||
if (n < 0) return(0);
|
if (n <= 0) return(0);
|
||||||
if(inc_x == 1 && inc_y == 1){
|
if(inc_x == 1 && inc_y == 1){
|
||||||
BLASLONG n2 = n * 2;
|
BLASLONG n2 = n * 2;
|
||||||
if(gvl <= n2/2){
|
if(gvl <= n2/2){
|
||||||
|
|
Loading…
Reference in New Issue