add riscv-v for nuclei UX900fd
This commit is contained in:
parent
c19dff0a31
commit
b83579d6ac
|
@ -59,6 +59,10 @@ ifeq ($(TARGET), x280)
|
||||||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), UX900FD)
|
||||||
|
TARGET_FLAGS = -march=rv64imafdc$(ARCH_EXT) -mabi=lp64d
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(TARGET), RISCV64_GENERIC)
|
ifeq ($(TARGET), RISCV64_GENERIC)
|
||||||
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
|
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -6,6 +6,12 @@ ifeq ($(CORE), x280)
|
||||||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -mllvm --riscv-v-vector-bits-min=512 -ffast-math
|
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -mllvm --riscv-v-vector-bits-min=512 -ffast-math
|
||||||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), UX900FD)
|
||||||
|
CCOMMON_OPT += -march=rv64imafdc$(ARCH_EXT) -mabi=lp64d -mtune=nuclei-900-series -static
|
||||||
|
FCOMMON_OPT += -march=rv64imafdc$(ARCH_EXT) -mabi=lp64d -mtune=nuclei-900-series -static
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), RISCV64_GENERIC)
|
ifeq ($(CORE), RISCV64_GENERIC)
|
||||||
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
||||||
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
|
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
|
||||||
|
|
|
@ -191,6 +191,13 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
|
||||||
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
|
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
|
||||||
```
|
```
|
||||||
|
|
||||||
|
- **UX900FD**: optimized Level-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 1.0(optional)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
# Enable RVV for Nuclei UX900FD by ARCH_EXT=v
|
||||||
|
make HOSTCC=gcc TARGET=UX900FD ARCH_EXT=v CC=riscv-nuclei-linux-gnu-gcc NOFORTRAN=1 NO_SHARED=1 USE_THREAD=0 CFLAGS=-static BINARY=64
|
||||||
|
```
|
||||||
|
|
||||||
### Support for multiple targets in a single library
|
### Support for multiple targets in a single library
|
||||||
|
|
||||||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
|
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
|
||||||
|
|
|
@ -121,6 +121,7 @@ Z14
|
||||||
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
|
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
|
||||||
C910V
|
C910V
|
||||||
x280
|
x280
|
||||||
|
UX900FD
|
||||||
|
|
||||||
11.LOONGARCH64:
|
11.LOONGARCH64:
|
||||||
LOONGSONGENERIC
|
LOONGSONGENERIC
|
||||||
|
|
|
@ -99,4 +99,9 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
#include <riscv_vector.h>
|
#include <riscv_vector.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(UX900FD)
|
||||||
|
#if (defined(__riscv_vector))
|
||||||
|
#include <riscv_vector.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -73,11 +73,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CPU_GENERIC 0
|
#define CPU_GENERIC 0
|
||||||
#define CPU_C910V 1
|
#define CPU_C910V 1
|
||||||
#define CPU_x280 2
|
#define CPU_x280 2
|
||||||
|
#define CPU_UX900FD 3
|
||||||
|
|
||||||
static char *cpuname[] = {
|
static char *cpuname[] = {
|
||||||
"RISCV64_GENERIC",
|
"RISCV64_GENERIC",
|
||||||
"C910V"
|
"C910V",
|
||||||
"x280"
|
"x280",
|
||||||
|
"UX900FD"
|
||||||
};
|
};
|
||||||
|
|
||||||
int detect(void){
|
int detect(void){
|
||||||
|
|
14
getarch.c
14
getarch.c
|
@ -1692,6 +1692,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_UX900FD
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "RISCV64"
|
||||||
|
#define SUBARCHITECTURE "UX900FD"
|
||||||
|
#define SUBDIRNAME "riscv64"
|
||||||
|
#define ARCHCONFIG "-DUX900FD " \
|
||||||
|
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
|
||||||
|
"-DL2_SIZE=524288 -DL2_LINESIZE=64 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
|
||||||
|
#define LIBNAME "UX900FD"
|
||||||
|
#define CORENAME "UX900FD"
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#if defined(FORCE_E2K) || defined(__e2k__)
|
#if defined(FORCE_E2K) || defined(__e2k__)
|
||||||
#define FORCE
|
#define FORCE
|
||||||
|
|
|
@ -0,0 +1,415 @@
|
||||||
|
# **********************************************************************************
|
||||||
|
# Copyright (c) 2022, The OpenBLAS Project
|
||||||
|
# All rights reserved.
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are
|
||||||
|
# met:
|
||||||
|
# 1. Redistributions of source code must retain the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer.
|
||||||
|
# 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
# notice, this list of conditions and the following disclaimer in
|
||||||
|
# the documentation and/or other materials provided with the
|
||||||
|
# distribution.
|
||||||
|
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
# its contributors may be used to endorse or promote products
|
||||||
|
# derived from this software without specific prior written permission.
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
# **********************************************************************************
|
||||||
|
|
||||||
|
ifeq ($(findstring v,$(ARCH_EXT)),)
|
||||||
|
|
||||||
|
SAMAXKERNEL = ../riscv64/amax.c
|
||||||
|
DAMAXKERNEL = ../riscv64/amax.c
|
||||||
|
CAMAXKERNEL = ../riscv64/zamax.c
|
||||||
|
ZAMAXKERNEL = ../riscv64/zamax.c
|
||||||
|
|
||||||
|
SAMINKERNEL = ../riscv64/amin.c
|
||||||
|
DAMINKERNEL = ../riscv64/amin.c
|
||||||
|
CAMINKERNEL = ../riscv64/zamin.c
|
||||||
|
ZAMINKERNEL = ../riscv64/zamin.c
|
||||||
|
|
||||||
|
SMAXKERNEL = ../riscv64/max.c
|
||||||
|
DMAXKERNEL = ../riscv64/max.c
|
||||||
|
|
||||||
|
SMINKERNEL = ../riscv64/min.c
|
||||||
|
DMINKERNEL = ../riscv64/min.c
|
||||||
|
|
||||||
|
ISAMAXKERNEL = ../riscv64/iamax.c
|
||||||
|
IDAMAXKERNEL = ../riscv64/iamax.c
|
||||||
|
ICAMAXKERNEL = ../riscv64/izamax.c
|
||||||
|
IZAMAXKERNEL = ../riscv64/izamax.c
|
||||||
|
|
||||||
|
ISAMINKERNEL = ../riscv64/iamin.c
|
||||||
|
IDAMINKERNEL = ../riscv64/iamin.c
|
||||||
|
ICAMINKERNEL = ../riscv64/izamin.c
|
||||||
|
IZAMINKERNEL = ../riscv64/izamin.c
|
||||||
|
|
||||||
|
ISMAXKERNEL = ../riscv64/imax.c
|
||||||
|
IDMAXKERNEL = ../riscv64/imax.c
|
||||||
|
|
||||||
|
ISMINKERNEL = ../riscv64/imin.c
|
||||||
|
IDMINKERNEL = ../riscv64/imin.c
|
||||||
|
|
||||||
|
SASUMKERNEL = ../riscv64/asum.c
|
||||||
|
DASUMKERNEL = ../riscv64/asum.c
|
||||||
|
CASUMKERNEL = ../riscv64/zasum.c
|
||||||
|
ZASUMKERNEL = ../riscv64/zasum.c
|
||||||
|
|
||||||
|
SSUMKERNEL = ../arm/sum.c
|
||||||
|
DSUMKERNEL = ../arm/sum.c
|
||||||
|
CSUMKERNEL = ../arm/zsum.c
|
||||||
|
ZSUMKERNEL = ../arm/zsum.c
|
||||||
|
|
||||||
|
SAXPYKERNEL = ../riscv64/axpy.c
|
||||||
|
DAXPYKERNEL = ../riscv64/axpy.c
|
||||||
|
CAXPYKERNEL = ../riscv64/zaxpy.c
|
||||||
|
ZAXPYKERNEL = ../riscv64/zaxpy.c
|
||||||
|
|
||||||
|
SCOPYKERNEL = ../riscv64/copy.c
|
||||||
|
DCOPYKERNEL = ../riscv64/copy.c
|
||||||
|
CCOPYKERNEL = ../riscv64/zcopy.c
|
||||||
|
ZCOPYKERNEL = ../riscv64/zcopy.c
|
||||||
|
|
||||||
|
SDOTKERNEL = ../riscv64/dot.c
|
||||||
|
DDOTKERNEL = ../riscv64/dot.c
|
||||||
|
CDOTKERNEL = ../riscv64/zdot.c
|
||||||
|
ZDOTKERNEL = ../riscv64/zdot.c
|
||||||
|
DSDOTKERNEL = ../generic/dot.c
|
||||||
|
|
||||||
|
SNRM2KERNEL = ../riscv64/nrm2.c
|
||||||
|
DNRM2KERNEL = ../riscv64/nrm2.c
|
||||||
|
CNRM2KERNEL = ../riscv64/znrm2.c
|
||||||
|
ZNRM2KERNEL = ../riscv64/znrm2.c
|
||||||
|
|
||||||
|
SROTKERNEL = ../riscv64/rot.c
|
||||||
|
DROTKERNEL = ../riscv64/rot.c
|
||||||
|
CROTKERNEL = ../riscv64/zrot.c
|
||||||
|
ZROTKERNEL = ../riscv64/zrot.c
|
||||||
|
|
||||||
|
SSCALKERNEL = ../riscv64/scal.c
|
||||||
|
DSCALKERNEL = ../riscv64/scal.c
|
||||||
|
CSCALKERNEL = ../riscv64/zscal.c
|
||||||
|
ZSCALKERNEL = ../riscv64/zscal.c
|
||||||
|
|
||||||
|
SSWAPKERNEL = ../riscv64/swap.c
|
||||||
|
DSWAPKERNEL = ../riscv64/swap.c
|
||||||
|
CSWAPKERNEL = ../riscv64/zswap.c
|
||||||
|
ZSWAPKERNEL = ../riscv64/zswap.c
|
||||||
|
|
||||||
|
SGEMVNKERNEL = ../riscv64/gemv_n.c
|
||||||
|
DGEMVNKERNEL = ../riscv64/gemv_n.c
|
||||||
|
CGEMVNKERNEL = ../riscv64/zgemv_n.c
|
||||||
|
ZGEMVNKERNEL = ../riscv64/zgemv_n.c
|
||||||
|
|
||||||
|
SGEMVTKERNEL = ../riscv64/gemv_t.c
|
||||||
|
DGEMVTKERNEL = ../riscv64/gemv_t.c
|
||||||
|
CGEMVTKERNEL = ../riscv64/zgemv_t.c
|
||||||
|
ZGEMVTKERNEL = ../riscv64/zgemv_t.c
|
||||||
|
|
||||||
|
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
|
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
|
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
|
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
|
|
||||||
|
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
|
||||||
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
SSYMV_U_KERNEL = ../generic/symv_k.c
|
||||||
|
SSYMV_L_KERNEL = ../generic/symv_k.c
|
||||||
|
DSYMV_U_KERNEL = ../generic/symv_k.c
|
||||||
|
DSYMV_L_KERNEL = ../generic/symv_k.c
|
||||||
|
CSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||||
|
CSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
|
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||||
|
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
|
|
||||||
|
|
||||||
|
LSAME_KERNEL = ../generic/lsame.c
|
||||||
|
|
||||||
|
SCABS_KERNEL = ../generic/cabs.c
|
||||||
|
DCABS_KERNEL = ../generic/cabs.c
|
||||||
|
QCABS_KERNEL = ../generic/cabs.c
|
||||||
|
|
||||||
|
ifndef SGEMM_BETA
|
||||||
|
SGEMM_BETA = ../generic/gemm_beta.c
|
||||||
|
endif
|
||||||
|
ifndef DGEMM_BETA
|
||||||
|
DGEMM_BETA = ../generic/gemm_beta.c
|
||||||
|
endif
|
||||||
|
ifndef CGEMM_BETA
|
||||||
|
CGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
endif
|
||||||
|
ifndef ZGEMM_BETA
|
||||||
|
ZGEMM_BETA = ../generic/zgemm_beta.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMINCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
SGEMMITCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
else
|
||||||
|
|
||||||
|
SAMAXKERNEL = amax_rvv.c
|
||||||
|
DAMAXKERNEL = amax_rvv.c
|
||||||
|
CAMAXKERNEL = zamax_rvv.c
|
||||||
|
ZAMAXKERNEL = zamax_rvv.c
|
||||||
|
|
||||||
|
SAMINKERNEL = amin_rvv.c
|
||||||
|
DAMINKERNEL = amin_rvv.c
|
||||||
|
CAMINKERNEL = zamin_rvv.c
|
||||||
|
ZAMINKERNEL = zamin_rvv.c
|
||||||
|
|
||||||
|
SMAXKERNEL = max_rvv.c
|
||||||
|
DMAXKERNEL = max_rvv.c
|
||||||
|
|
||||||
|
SMINKERNEL = min_rvv.c
|
||||||
|
DMINKERNEL = min_rvv.c
|
||||||
|
|
||||||
|
ISAMAXKERNEL = iamax_rvv.c
|
||||||
|
IDAMAXKERNEL = iamax_rvv.c
|
||||||
|
ICAMAXKERNEL = izamax_rvv.c
|
||||||
|
IZAMAXKERNEL = izamax_rvv.c
|
||||||
|
|
||||||
|
ISAMINKERNEL = iamin_rvv.c
|
||||||
|
IDAMINKERNEL = iamin_rvv.c
|
||||||
|
ICAMINKERNEL = izamin_rvv.c
|
||||||
|
IZAMINKERNEL = izamin_rvv.c
|
||||||
|
|
||||||
|
ISMAXKERNEL = imax_rvv.c
|
||||||
|
IDMAXKERNEL = imax_rvv.c
|
||||||
|
|
||||||
|
ISMINKERNEL = imin_rvv.c
|
||||||
|
IDMINKERNEL = imin_rvv.c
|
||||||
|
|
||||||
|
SASUMKERNEL = asum_rvv.c
|
||||||
|
DASUMKERNEL = asum_rvv.c
|
||||||
|
CASUMKERNEL = zasum_rvv.c
|
||||||
|
ZASUMKERNEL = zasum_rvv.c
|
||||||
|
|
||||||
|
SSUMKERNEL = sum_rvv.c
|
||||||
|
DSUMKERNEL = sum_rvv.c
|
||||||
|
CSUMKERNEL = zsum_rvv.c
|
||||||
|
ZSUMKERNEL = zsum_rvv.c
|
||||||
|
|
||||||
|
SAXPYKERNEL = axpy_rvv.c
|
||||||
|
DAXPYKERNEL = axpy_rvv.c
|
||||||
|
CAXPYKERNEL = zaxpy_rvv.c
|
||||||
|
ZAXPYKERNEL = zaxpy_rvv.c
|
||||||
|
|
||||||
|
SAXPBYKERNEL = axpby_rvv.c
|
||||||
|
DAXPBYKERNEL = axpby_rvv.c
|
||||||
|
CAXPBYKERNEL = zaxpby_rvv.c
|
||||||
|
ZAXPBYKERNEL = zaxpby_rvv.c
|
||||||
|
|
||||||
|
SCOPYKERNEL = copy_rvv.c
|
||||||
|
DCOPYKERNEL = copy_rvv.c
|
||||||
|
CCOPYKERNEL = zcopy_rvv.c
|
||||||
|
ZCOPYKERNEL = zcopy_rvv.c
|
||||||
|
|
||||||
|
SDOTKERNEL = dot_rvv.c
|
||||||
|
DDOTKERNEL = dot_rvv.c
|
||||||
|
CDOTKERNEL = zdot_rvv.c
|
||||||
|
ZDOTKERNEL = zdot_rvv.c
|
||||||
|
DSDOTKERNEL = dot_rvv.c
|
||||||
|
|
||||||
|
SNRM2KERNEL = nrm2_rvv.c
|
||||||
|
DNRM2KERNEL = nrm2_rvv.c
|
||||||
|
CNRM2KERNEL = znrm2_rvv.c
|
||||||
|
ZNRM2KERNEL = znrm2_rvv.c
|
||||||
|
|
||||||
|
SROTKERNEL = rot_rvv.c
|
||||||
|
DROTKERNEL = rot_rvv.c
|
||||||
|
CROTKERNEL = zrot_rvv.c
|
||||||
|
ZROTKERNEL = zrot_rvv.c
|
||||||
|
|
||||||
|
SSCALKERNEL = scal_rvv.c
|
||||||
|
DSCALKERNEL = scal_rvv.c
|
||||||
|
CSCALKERNEL = zscal_rvv.c
|
||||||
|
ZSCALKERNEL = zscal_rvv.c
|
||||||
|
|
||||||
|
SSWAPKERNEL = swap_rvv.c
|
||||||
|
DSWAPKERNEL = swap_rvv.c
|
||||||
|
CSWAPKERNEL = zswap_rvv.c
|
||||||
|
ZSWAPKERNEL = zswap_rvv.c
|
||||||
|
|
||||||
|
SGEMVNKERNEL = gemv_n_rvv.c
|
||||||
|
DGEMVNKERNEL = gemv_n_rvv.c
|
||||||
|
CGEMVNKERNEL = zgemv_n_rvv.c
|
||||||
|
ZGEMVNKERNEL = zgemv_n_rvv.c
|
||||||
|
|
||||||
|
SGEMVTKERNEL = gemv_t_rvv.c
|
||||||
|
DGEMVTKERNEL = gemv_t_rvv.c
|
||||||
|
CGEMVTKERNEL = zgemv_t_rvv.c
|
||||||
|
ZGEMVTKERNEL = zgemv_t_rvv.c
|
||||||
|
|
||||||
|
CTRMMKERNEL = ztrmmkernel_2x2_rvv.c
|
||||||
|
ZTRMMKERNEL = ztrmmkernel_2x2_rvv.c
|
||||||
|
|
||||||
|
# SGEMM_UNROLL_N set in params.h
|
||||||
|
ifeq ($(SGEMM_UNROLL_N), 8)
|
||||||
|
# UNROLL_M is VLMAX
|
||||||
|
SGEMMKERNEL = gemmkernel_rvv_v1x8.c
|
||||||
|
SGEMMINCOPY = gemm_ncopy_rvv_v1.c
|
||||||
|
SGEMMITCOPY = gemm_tcopy_rvv_v1.c
|
||||||
|
SGEMMONCOPY = gemm_ncopy_$(SGEMM_UNROLL_N)_rvv.c
|
||||||
|
SGEMMOTCOPY = gemm_tcopy_$(SGEMM_UNROLL_N)_rvv.c
|
||||||
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
STRMMKERNEL = trmmkernel_rvv_v1x8.c
|
||||||
|
|
||||||
|
STRMMUNCOPY_M = trmm_uncopy_rvv_v1.c
|
||||||
|
STRMMLNCOPY_M = trmm_lncopy_rvv_v1.c
|
||||||
|
STRMMUTCOPY_M = trmm_utcopy_rvv_v1.c
|
||||||
|
STRMMLTCOPY_M = trmm_ltcopy_rvv_v1.c
|
||||||
|
|
||||||
|
SSYMMUCOPY_M = symm_ucopy_rvv_v1.c
|
||||||
|
SSYMMLCOPY_M = symm_lcopy_rvv_v1.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
# SGEMM_UNROLL_N set in params.h
|
||||||
|
ifeq ($(DGEMM_UNROLL_N), 8)
|
||||||
|
# UNROLL_M is VLMAX
|
||||||
|
DGEMMKERNEL = gemmkernel_rvv_v1x8.c
|
||||||
|
DGEMMINCOPY = gemm_ncopy_rvv_v1.c
|
||||||
|
DGEMMITCOPY = gemm_tcopy_rvv_v1.c
|
||||||
|
DGEMMONCOPY = gemm_ncopy_$(DGEMM_UNROLL_N)_rvv.c
|
||||||
|
DGEMMOTCOPY = gemm_tcopy_$(DGEMM_UNROLL_N)_rvv.c
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
DTRMMKERNEL = trmmkernel_rvv_v1x8.c
|
||||||
|
DTRMMUNCOPY_M = trmm_uncopy_rvv_v1.c
|
||||||
|
DTRMMLNCOPY_M = trmm_lncopy_rvv_v1.c
|
||||||
|
DTRMMUTCOPY_M = trmm_utcopy_rvv_v1.c
|
||||||
|
DTRMMLTCOPY_M = trmm_ltcopy_rvv_v1.c
|
||||||
|
|
||||||
|
DSYMMUCOPY_M = symm_ucopy_rvv_v1.c
|
||||||
|
DSYMMLCOPY_M = symm_lcopy_rvv_v1.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
|
||||||
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
|
||||||
|
STRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
|
||||||
|
STRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
|
||||||
|
STRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = trsm_kernel_LN_rvv_v1.c
|
||||||
|
DTRSMKERNEL_LT = trsm_kernel_LT_rvv_v1.c
|
||||||
|
DTRSMKERNEL_RN = trsm_kernel_RN_rvv_v1.c
|
||||||
|
DTRSMKERNEL_RT = trsm_kernel_RT_rvv_v1.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
TRSMCOPYLN_M = trsm_lncopy_rvv_v1.c
|
||||||
|
TRSMCOPYLT_M = trsm_ltcopy_rvv_v1.c
|
||||||
|
TRSMCOPYUN_M = trsm_uncopy_rvv_v1.c
|
||||||
|
TRSMCOPYUT_M = trsm_utcopy_rvv_v1.c
|
||||||
|
|
||||||
|
SSYMV_U_KERNEL = symv_U_rvv.c
|
||||||
|
SSYMV_L_KERNEL = symv_L_rvv.c
|
||||||
|
DSYMV_U_KERNEL = symv_U_rvv.c
|
||||||
|
DSYMV_L_KERNEL = symv_L_rvv.c
|
||||||
|
CSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||||
|
CSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
|
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
|
||||||
|
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
|
||||||
|
|
||||||
|
|
||||||
|
LSAME_KERNEL = ../generic/lsame.c
|
||||||
|
|
||||||
|
SCABS_KERNEL = ../generic/cabs.c
|
||||||
|
DCABS_KERNEL = ../generic/cabs.c
|
||||||
|
QCABS_KERNEL = ../generic/cabs.c
|
||||||
|
|
||||||
|
ifndef SGEMM_BETA
|
||||||
|
SGEMM_BETA = gemm_beta_rvv.c
|
||||||
|
endif
|
||||||
|
ifndef DGEMM_BETA
|
||||||
|
DGEMM_BETA = gemm_beta_rvv.c
|
||||||
|
endif
|
||||||
|
ifndef CGEMM_BETA
|
||||||
|
CGEMM_BETA = zgemm_beta_rvv.c
|
||||||
|
endif
|
||||||
|
ifndef ZGEMM_BETA
|
||||||
|
ZGEMM_BETA = zgemm_beta_rvv.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
endif
|
47
param.h
47
param.h
|
@ -3082,6 +3082,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define GEMM_DEFAULT_OFFSET_B 0
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(UX900FD)
|
||||||
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 16 // 4 // 16 // 2
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 8 // 4 // 4 // 2
|
||||||
|
/* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
|
||||||
|
* Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
|
||||||
|
* If VLMAX size is ever more than 1024, this should be increased also. */
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_MN 32
|
||||||
|
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 16 //2 // 8
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 8 //2 // 4
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_MN 32
|
||||||
|
|
||||||
|
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_P 160
|
||||||
|
#define DGEMM_DEFAULT_P 160
|
||||||
|
#define CGEMM_DEFAULT_P 96
|
||||||
|
#define ZGEMM_DEFAULT_P 64
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_Q 240
|
||||||
|
#define DGEMM_DEFAULT_Q 128
|
||||||
|
#define CGEMM_DEFAULT_Q 120
|
||||||
|
#define ZGEMM_DEFAULT_Q 120
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_R 12288
|
||||||
|
#define DGEMM_DEFAULT_R 8192
|
||||||
|
#define CGEMM_DEFAULT_R 4096
|
||||||
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
|
#define SYMV_P 16
|
||||||
|
|
||||||
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
|
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef C910V
|
#ifdef C910V
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
#define GEMM_DEFAULT_OFFSET_B 0
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
|
|
Loading…
Reference in New Issue