* update intrinsics to match latest spec at https://github.com/riscv-non-isa/rvv-intrinsic-doc (in particular, __riscv_ prefixes for rvv intrinsics)
* fix multiple numerical stability and corner case issues * add a script to generate arbitrary gemm kernel shapes * add a generic zvl256b target to demonstrate large gemm kernel unrolls
This commit is contained in:
@@ -28,30 +28,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include "common.h"
|
||||
#include <math.h>
|
||||
#include <float.h>
|
||||
#if !defined(DOUBLE)
|
||||
#define VSETVL(n) vsetvl_e32m8(n)
|
||||
#define VSETVL_MAX vsetvlmax_e32m1()
|
||||
#define FLOAT_V_T vfloat32m8_t
|
||||
#define FLOAT_V_T_M1 vfloat32m1_t
|
||||
#define VLEV_FLOAT vle32_v_f32m8
|
||||
#define VLSEV_FLOAT vlse32_v_f32m8
|
||||
#define VFREDMAXVS_FLOAT vfredmax_vs_f32m8_f32m1
|
||||
#define VFMVVF_FLOAT vfmv_v_f_f32m8
|
||||
#define VFMVVF_FLOAT_M1 vfmv_v_f_f32m1
|
||||
#define VFMAXVV_FLOAT vfmax_vv_f32m8
|
||||
|
||||
#ifdef RISCV64_ZVL256B
|
||||
# define LMUL m2
|
||||
# if defined(DOUBLE)
|
||||
# define ELEN 64
|
||||
# define MLEN 32
|
||||
# else
|
||||
# define ELEN 32
|
||||
# define MLEN 16
|
||||
# endif
|
||||
#else
|
||||
#define VSETVL(n) vsetvl_e64m8(n)
|
||||
#define VSETVL_MAX vsetvlmax_e64m1()
|
||||
#define FLOAT_V_T vfloat64m8_t
|
||||
#define FLOAT_V_T_M1 vfloat64m1_t
|
||||
#define VLEV_FLOAT vle64_v_f64m8
|
||||
#define VLSEV_FLOAT vlse64_v_f64m8
|
||||
#define VFREDMAXVS_FLOAT vfredmax_vs_f64m8_f64m1
|
||||
#define VFMVVF_FLOAT vfmv_v_f_f64m8
|
||||
#define VFMVVF_FLOAT_M1 vfmv_v_f_f64m1
|
||||
#define VFMAXVV_FLOAT vfmax_vv_f64m8
|
||||
# define LMUL m8
|
||||
# if defined(DOUBLE)
|
||||
# define ELEN 64
|
||||
# define MLEN 8
|
||||
# else
|
||||
# define ELEN 32
|
||||
# define MLEN 4
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#define _
|
||||
#define JOIN2_X(x, y) x ## y
|
||||
#define JOIN2(x, y) JOIN2_X(x, y)
|
||||
#define JOIN(v, w, x, y, z) JOIN2( JOIN2( JOIN2( JOIN2( v, w ), x), y), z)
|
||||
|
||||
#define VSETVL JOIN(__riscv_vsetvl, _e, ELEN, LMUL, _)
|
||||
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
|
||||
#define FLOAT_V_T_M1 JOIN(vfloat, ELEN, m1, _t, _)
|
||||
#define VLEV_FLOAT JOIN(__riscv_vle, ELEN, _v_f, ELEN, LMUL)
|
||||
#define VLSEV_FLOAT JOIN(__riscv_vlse, ELEN, _v_f, ELEN, LMUL)
|
||||
#define VFREDMAXVS_FLOAT JOIN(__riscv_vfredmax_vs_f, ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
||||
#define MASK_T JOIN(vbool, MLEN, _t, _, _)
|
||||
#define VMFLTVF_FLOAT JOIN(__riscv_vmflt_vf_f, ELEN, LMUL, _b, MLEN)
|
||||
#define VFMVVF_FLOAT JOIN(__riscv_vfmv, _v_f_f, ELEN, LMUL, _)
|
||||
#define VFMVVF_FLOAT_M1 JOIN(__riscv_vfmv, _v_f_f, ELEN, m1, _)
|
||||
#define VFMAXVV_FLOAT JOIN(__riscv_vfmax, _vv_f, ELEN, LMUL, _)
|
||||
|
||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
{
|
||||
BLASLONG i=0, j=0;
|
||||
@@ -59,10 +73,8 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
FLOAT maxf=-FLT_MAX;
|
||||
unsigned int gvl = 0;
|
||||
FLOAT_V_T v0, v1, v_max;
|
||||
FLOAT_V_T_M1 v_res, v_min;
|
||||
gvl = VSETVL_MAX;
|
||||
v_res = VFMVVF_FLOAT_M1(0, gvl);
|
||||
v_min = VFMVVF_FLOAT_M1(-FLT_MAX, gvl);
|
||||
FLOAT_V_T_M1 v_res;
|
||||
v_res = VFMVVF_FLOAT_M1(-FLT_MAX, 1);
|
||||
|
||||
if(inc_x == 1){
|
||||
gvl = VSETVL(n);
|
||||
@@ -76,15 +88,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
v_max = VFMAXVV_FLOAT(v_max, v1, gvl);
|
||||
j += gvl * 2;
|
||||
}
|
||||
v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl);
|
||||
maxf = *((FLOAT*)&v_res);
|
||||
v_res = VFREDMAXVS_FLOAT(v_max, v_res, gvl);
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = VSETVL(n-j);
|
||||
v0 = VLEV_FLOAT(&x[j], gvl);
|
||||
v_res = VFREDMAXVS_FLOAT(v_res, v0, v_min, gvl);
|
||||
if(*((FLOAT*)&v_res) > maxf)
|
||||
maxf = *((FLOAT*)&v_res);
|
||||
v_res = VFREDMAXVS_FLOAT(v0, v_res, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else{
|
||||
@@ -102,18 +111,16 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
||||
j += gvl * 2;
|
||||
idx += inc_xv * 2;
|
||||
}
|
||||
v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_min, gvl);
|
||||
maxf = *((FLOAT*)&v_res);
|
||||
v_res = VFREDMAXVS_FLOAT(v_max, v_res, gvl);
|
||||
}
|
||||
for(;j<n;){
|
||||
gvl = VSETVL(n-j);
|
||||
v0 = VLSEV_FLOAT(&x[j*inc_x], stride_x, gvl);
|
||||
v_res = VFREDMAXVS_FLOAT(v_res, v0, v_min, gvl);
|
||||
if(*((FLOAT*)&v_res) > maxf)
|
||||
maxf = *((FLOAT*)&v_res);
|
||||
v_res = VFREDMAXVS_FLOAT(v0, v_res, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}
|
||||
maxf = EXTRACT_FLOAT(v_res);
|
||||
return(maxf);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user