add riscv-v for nuclei UX900fd

This commit is contained in:
shuzhuo 2023-03-13 11:41:01 +08:00
parent c19dff0a31
commit b83579d6ac
9 changed files with 503 additions and 2 deletions

View File

@ -59,6 +59,10 @@ ifeq ($(TARGET), x280)
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
endif
ifeq ($(TARGET), UX900FD)
TARGET_FLAGS = -march=rv64imafdc$(ARCH_EXT) -mabi=lp64d
endif
ifeq ($(TARGET), RISCV64_GENERIC)
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
endif

View File

@ -6,6 +6,12 @@ ifeq ($(CORE), x280)
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -mllvm --riscv-v-vector-bits-min=512 -ffast-math
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
endif
ifeq ($(CORE), UX900FD)
CCOMMON_OPT += -march=rv64imafdc$(ARCH_EXT) -mabi=lp64d -mtune=nuclei-900-series -static
FCOMMON_OPT += -march=rv64imafdc$(ARCH_EXT) -mabi=lp64d -mtune=nuclei-900-series -static
endif
ifeq ($(CORE), RISCV64_GENERIC)
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static

View File

@ -191,6 +191,13 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
```
- **UX900FD**: optimized Level-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 1.0(optional)
```sh
# Enable RVV for Nuclei UX900FD by ARCH_EXT=v
make HOSTCC=gcc TARGET=UX900FD ARCH_EXT=v CC=riscv-nuclei-linux-gnu-gcc NOFORTRAN=1 NO_SHARED=1 USE_THREAD=0 CFLAGS=-static BINARY=64
```
### Support for multiple targets in a single library
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.

View File

@ -121,6 +121,7 @@ Z14
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
C910V
x280
UX900FD
11.LOONGARCH64:
LOONGSONGENERIC

View File

@ -99,4 +99,9 @@ static inline int blas_quickdivide(blasint x, blasint y){
#include <riscv_vector.h>
#endif
#if defined(UX900FD)
#if (defined(__riscv_vector))
#include <riscv_vector.h>
#endif
#endif
#endif

View File

@ -73,11 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CPU_GENERIC 0
#define CPU_C910V 1
#define CPU_x280 2
#define CPU_UX900FD 3
static char *cpuname[] = {
"RISCV64_GENERIC",
"C910V"
"x280"
"C910V",
"x280",
"UX900FD"
};
int detect(void){

View File

@ -1692,6 +1692,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif
#ifdef FORCE_UX900FD
#define FORCE
#define ARCHITECTURE "RISCV64"
#define SUBARCHITECTURE "UX900FD"
#define SUBDIRNAME "riscv64"
#define ARCHCONFIG "-DUX900FD " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
#define LIBNAME "UX900FD"
#define CORENAME "UX900FD"
#else
#endif
#if defined(FORCE_E2K) || defined(__e2k__)
#define FORCE

415
kernel/riscv64/KERNEL.UX900FD Executable file
View File

@ -0,0 +1,415 @@
# **********************************************************************************
# Copyright (c) 2022, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# **********************************************************************************
ifeq ($(findstring v,$(ARCH_EXT)),)
SAMAXKERNEL = ../riscv64/amax.c
DAMAXKERNEL = ../riscv64/amax.c
CAMAXKERNEL = ../riscv64/zamax.c
ZAMAXKERNEL = ../riscv64/zamax.c
SAMINKERNEL = ../riscv64/amin.c
DAMINKERNEL = ../riscv64/amin.c
CAMINKERNEL = ../riscv64/zamin.c
ZAMINKERNEL = ../riscv64/zamin.c
SMAXKERNEL = ../riscv64/max.c
DMAXKERNEL = ../riscv64/max.c
SMINKERNEL = ../riscv64/min.c
DMINKERNEL = ../riscv64/min.c
ISAMAXKERNEL = ../riscv64/iamax.c
IDAMAXKERNEL = ../riscv64/iamax.c
ICAMAXKERNEL = ../riscv64/izamax.c
IZAMAXKERNEL = ../riscv64/izamax.c
ISAMINKERNEL = ../riscv64/iamin.c
IDAMINKERNEL = ../riscv64/iamin.c
ICAMINKERNEL = ../riscv64/izamin.c
IZAMINKERNEL = ../riscv64/izamin.c
ISMAXKERNEL = ../riscv64/imax.c
IDMAXKERNEL = ../riscv64/imax.c
ISMINKERNEL = ../riscv64/imin.c
IDMINKERNEL = ../riscv64/imin.c
SASUMKERNEL = ../riscv64/asum.c
DASUMKERNEL = ../riscv64/asum.c
CASUMKERNEL = ../riscv64/zasum.c
ZASUMKERNEL = ../riscv64/zasum.c
SSUMKERNEL = ../arm/sum.c
DSUMKERNEL = ../arm/sum.c
CSUMKERNEL = ../arm/zsum.c
ZSUMKERNEL = ../arm/zsum.c
SAXPYKERNEL = ../riscv64/axpy.c
DAXPYKERNEL = ../riscv64/axpy.c
CAXPYKERNEL = ../riscv64/zaxpy.c
ZAXPYKERNEL = ../riscv64/zaxpy.c
SCOPYKERNEL = ../riscv64/copy.c
DCOPYKERNEL = ../riscv64/copy.c
CCOPYKERNEL = ../riscv64/zcopy.c
ZCOPYKERNEL = ../riscv64/zcopy.c
SDOTKERNEL = ../riscv64/dot.c
DDOTKERNEL = ../riscv64/dot.c
CDOTKERNEL = ../riscv64/zdot.c
ZDOTKERNEL = ../riscv64/zdot.c
DSDOTKERNEL = ../generic/dot.c
SNRM2KERNEL = ../riscv64/nrm2.c
DNRM2KERNEL = ../riscv64/nrm2.c
CNRM2KERNEL = ../riscv64/znrm2.c
ZNRM2KERNEL = ../riscv64/znrm2.c
SROTKERNEL = ../riscv64/rot.c
DROTKERNEL = ../riscv64/rot.c
CROTKERNEL = ../riscv64/zrot.c
ZROTKERNEL = ../riscv64/zrot.c
SSCALKERNEL = ../riscv64/scal.c
DSCALKERNEL = ../riscv64/scal.c
CSCALKERNEL = ../riscv64/zscal.c
ZSCALKERNEL = ../riscv64/zscal.c
SSWAPKERNEL = ../riscv64/swap.c
DSWAPKERNEL = ../riscv64/swap.c
CSWAPKERNEL = ../riscv64/zswap.c
ZSWAPKERNEL = ../riscv64/zswap.c
SGEMVNKERNEL = ../riscv64/gemv_n.c
DGEMVNKERNEL = ../riscv64/gemv_n.c
CGEMVNKERNEL = ../riscv64/zgemv_n.c
ZGEMVNKERNEL = ../riscv64/zgemv_n.c
SGEMVTKERNEL = ../riscv64/gemv_t.c
DGEMVTKERNEL = ../riscv64/gemv_t.c
CGEMVTKERNEL = ../riscv64/zgemv_t.c
ZGEMVTKERNEL = ../riscv64/zgemv_t.c
STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
SSYMV_U_KERNEL = ../generic/symv_k.c
SSYMV_L_KERNEL = ../generic/symv_k.c
DSYMV_U_KERNEL = ../generic/symv_k.c
DSYMV_L_KERNEL = ../generic/symv_k.c
CSYMV_U_KERNEL = ../generic/zsymv_k.c
CSYMV_L_KERNEL = ../generic/zsymv_k.c
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c
ifndef SGEMM_BETA
SGEMM_BETA = ../generic/gemm_beta.c
endif
ifndef DGEMM_BETA
DGEMM_BETA = ../generic/gemm_beta.c
endif
ifndef CGEMM_BETA
CGEMM_BETA = ../generic/zgemm_beta.c
endif
ifndef ZGEMM_BETA
ZGEMM_BETA = ../generic/zgemm_beta.c
endif
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMINCOPY = ../generic/gemm_ncopy_2.c
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
else
SAMAXKERNEL = amax_rvv.c
DAMAXKERNEL = amax_rvv.c
CAMAXKERNEL = zamax_rvv.c
ZAMAXKERNEL = zamax_rvv.c
SAMINKERNEL = amin_rvv.c
DAMINKERNEL = amin_rvv.c
CAMINKERNEL = zamin_rvv.c
ZAMINKERNEL = zamin_rvv.c
SMAXKERNEL = max_rvv.c
DMAXKERNEL = max_rvv.c
SMINKERNEL = min_rvv.c
DMINKERNEL = min_rvv.c
ISAMAXKERNEL = iamax_rvv.c
IDAMAXKERNEL = iamax_rvv.c
ICAMAXKERNEL = izamax_rvv.c
IZAMAXKERNEL = izamax_rvv.c
ISAMINKERNEL = iamin_rvv.c
IDAMINKERNEL = iamin_rvv.c
ICAMINKERNEL = izamin_rvv.c
IZAMINKERNEL = izamin_rvv.c
ISMAXKERNEL = imax_rvv.c
IDMAXKERNEL = imax_rvv.c
ISMINKERNEL = imin_rvv.c
IDMINKERNEL = imin_rvv.c
SASUMKERNEL = asum_rvv.c
DASUMKERNEL = asum_rvv.c
CASUMKERNEL = zasum_rvv.c
ZASUMKERNEL = zasum_rvv.c
SSUMKERNEL = sum_rvv.c
DSUMKERNEL = sum_rvv.c
CSUMKERNEL = zsum_rvv.c
ZSUMKERNEL = zsum_rvv.c
SAXPYKERNEL = axpy_rvv.c
DAXPYKERNEL = axpy_rvv.c
CAXPYKERNEL = zaxpy_rvv.c
ZAXPYKERNEL = zaxpy_rvv.c
SAXPBYKERNEL = axpby_rvv.c
DAXPBYKERNEL = axpby_rvv.c
CAXPBYKERNEL = zaxpby_rvv.c
ZAXPBYKERNEL = zaxpby_rvv.c
SCOPYKERNEL = copy_rvv.c
DCOPYKERNEL = copy_rvv.c
CCOPYKERNEL = zcopy_rvv.c
ZCOPYKERNEL = zcopy_rvv.c
SDOTKERNEL = dot_rvv.c
DDOTKERNEL = dot_rvv.c
CDOTKERNEL = zdot_rvv.c
ZDOTKERNEL = zdot_rvv.c
DSDOTKERNEL = dot_rvv.c
SNRM2KERNEL = nrm2_rvv.c
DNRM2KERNEL = nrm2_rvv.c
CNRM2KERNEL = znrm2_rvv.c
ZNRM2KERNEL = znrm2_rvv.c
SROTKERNEL = rot_rvv.c
DROTKERNEL = rot_rvv.c
CROTKERNEL = zrot_rvv.c
ZROTKERNEL = zrot_rvv.c
SSCALKERNEL = scal_rvv.c
DSCALKERNEL = scal_rvv.c
CSCALKERNEL = zscal_rvv.c
ZSCALKERNEL = zscal_rvv.c
SSWAPKERNEL = swap_rvv.c
DSWAPKERNEL = swap_rvv.c
CSWAPKERNEL = zswap_rvv.c
ZSWAPKERNEL = zswap_rvv.c
SGEMVNKERNEL = gemv_n_rvv.c
DGEMVNKERNEL = gemv_n_rvv.c
CGEMVNKERNEL = zgemv_n_rvv.c
ZGEMVNKERNEL = zgemv_n_rvv.c
SGEMVTKERNEL = gemv_t_rvv.c
DGEMVTKERNEL = gemv_t_rvv.c
CGEMVTKERNEL = zgemv_t_rvv.c
ZGEMVTKERNEL = zgemv_t_rvv.c
CTRMMKERNEL = ztrmmkernel_2x2_rvv.c
ZTRMMKERNEL = ztrmmkernel_2x2_rvv.c
# SGEMM_UNROLL_N set in params.h
ifeq ($(SGEMM_UNROLL_N), 8)
# UNROLL_M is VLMAX
SGEMMKERNEL = gemmkernel_rvv_v1x8.c
SGEMMINCOPY = gemm_ncopy_rvv_v1.c
SGEMMITCOPY = gemm_tcopy_rvv_v1.c
SGEMMONCOPY = gemm_ncopy_$(SGEMM_UNROLL_N)_rvv.c
SGEMMOTCOPY = gemm_tcopy_$(SGEMM_UNROLL_N)_rvv.c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRMMKERNEL = trmmkernel_rvv_v1x8.c
STRMMUNCOPY_M = trmm_uncopy_rvv_v1.c
STRMMLNCOPY_M = trmm_lncopy_rvv_v1.c
STRMMUTCOPY_M = trmm_utcopy_rvv_v1.c
STRMMLTCOPY_M = trmm_ltcopy_rvv_v1.c
SSYMMUCOPY_M = symm_ucopy_rvv_v1.c
SSYMMLCOPY_M = symm_lcopy_rvv_v1.c
endif
# SGEMM_UNROLL_N set in params.h
ifeq ($(DGEMM_UNROLL_N), 8)
# UNROLL_M is VLMAX
DGEMMKERNEL = gemmkernel_rvv_v1x8.c
DGEMMINCOPY = gemm_ncopy_rvv_v1.c
DGEMMITCOPY = gemm_tcopy_rvv_v1.c
DGEMMONCOPY = gemm_ncopy_$(DGEMM_UNROLL_N)_rvv.c
DGEMMOTCOPY = gemm_tcopy_$(DGEMM_UNROLL_N)_rvv.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
DTRMMKERNEL = trmmkernel_rvv_v1x8.c
DTRMMUNCOPY_M = trmm_uncopy_rvv_v1.c
DTRMMLNCOPY_M = trmm_lncopy_rvv_v1.c
DTRMMUTCOPY_M = trmm_utcopy_rvv_v1.c
DTRMMLTCOPY_M = trmm_ltcopy_rvv_v1.c
DSYMMUCOPY_M = symm_ucopy_rvv_v1.c
DSYMMLCOPY_M = symm_lcopy_rvv_v1.c
endif
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
STRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
STRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
STRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
DTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
DTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
DTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
DTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
TRSMCOPYLN_M = trsm_lncopy_rvv_v1.c
TRSMCOPYLT_M = trsm_ltcopy_rvv_v1.c
TRSMCOPYUN_M = trsm_uncopy_rvv_v1.c
TRSMCOPYUT_M = trsm_utcopy_rvv_v1.c
SSYMV_U_KERNEL = symv_U_rvv.c
SSYMV_L_KERNEL = symv_L_rvv.c
DSYMV_U_KERNEL = symv_U_rvv.c
DSYMV_L_KERNEL = symv_L_rvv.c
CSYMV_U_KERNEL = ../generic/zsymv_k.c
CSYMV_L_KERNEL = ../generic/zsymv_k.c
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
LSAME_KERNEL = ../generic/lsame.c
SCABS_KERNEL = ../generic/cabs.c
DCABS_KERNEL = ../generic/cabs.c
QCABS_KERNEL = ../generic/cabs.c
ifndef SGEMM_BETA
SGEMM_BETA = gemm_beta_rvv.c
endif
ifndef DGEMM_BETA
DGEMM_BETA = gemm_beta_rvv.c
endif
ifndef CGEMM_BETA
CGEMM_BETA = zgemm_beta_rvv.c
endif
ifndef ZGEMM_BETA
ZGEMM_BETA = zgemm_beta_rvv.c
endif
endif

47
param.h
View File

@ -3082,6 +3082,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0
#endif
#if defined(UX900FD)
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 16 // 4 // 16 // 2
#define SGEMM_DEFAULT_UNROLL_N 8 // 4 // 4 // 2
/* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
* Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
* If VLMAX size is ever more than 1024, this should be increased also. */
#define SGEMM_DEFAULT_UNROLL_MN 32
#define DGEMM_DEFAULT_UNROLL_M 16 //2 // 8
#define DGEMM_DEFAULT_UNROLL_N 8 //2 // 4
#define DGEMM_DEFAULT_UNROLL_MN 32
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 160
#define DGEMM_DEFAULT_P 160
#define CGEMM_DEFAULT_P 96
#define ZGEMM_DEFAULT_P 64
#define SGEMM_DEFAULT_Q 240
#define DGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 120
#define ZGEMM_DEFAULT_Q 120
#define SGEMM_DEFAULT_R 12288
#define DGEMM_DEFAULT_R 8192
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#endif
#ifdef C910V
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0