Merge pull request #4504 from sergei-lewis/dev/slewis/ci
Add builds and unit tests for new RISCV platforms to CI
This commit is contained in:
commit
ebbf5b3ea0
|
@ -0,0 +1,253 @@
|
|||
name: riscv64 zvl256b qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
triple: riscv64-unknown-linux-gnu
|
||||
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain
|
||||
riscv_gnu_toolchain_version: 13.2.0
|
||||
riscv_gnu_toolchain_nightly_download_path: /releases/download/2024.02.02/riscv64-glibc-ubuntu-22.04-llvm-nightly-2024.02.02-nightly.tar.gz
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: RISCV64_ZVL128B
|
||||
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64
|
||||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64
|
||||
- target: RISCV64_ZVL256B
|
||||
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
|
||||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make \
|
||||
libgomp1-riscv64-cross ccache
|
||||
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path}
|
||||
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS libs
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc)
|
||||
|
||||
- name: build OpenBLAS tests
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='${triple}-gcc' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests
|
||||
|
||||
- name: build lapack-netlib tests
|
||||
working-directory: ./lapack-netlib/TESTING
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='${triple}-gcc' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \
|
||||
LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \
|
||||
LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \
|
||||
LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \
|
||||
|
||||
- name: OpenBLAS tests
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
export QEMU_CPU=${{ matrix.qemu_cpu }}
|
||||
rm -rf ./test_out
|
||||
mkdir -p ./test_out
|
||||
run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \
|
||||
echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \
|
||||
if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \
|
||||
else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \
|
||||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \
|
||||
}
|
||||
run_test test cblat1 &
|
||||
run_test test cblat2 cblat2.dat &
|
||||
run_test test cblat3 cblat3.dat &
|
||||
run_test test dblat1 &
|
||||
run_test test dblat2 dblat2.dat &
|
||||
run_test test dblat3 dblat3.dat &
|
||||
run_test test sblat1 &
|
||||
run_test test sblat2 sblat2.dat &
|
||||
run_test test sblat3 sblat3.dat &
|
||||
run_test test zblat1 &
|
||||
run_test test zblat2 zblat2.dat &
|
||||
run_test test zblat3 zblat3.dat &
|
||||
run_test ctest xccblat1 &
|
||||
run_test ctest xccblat2 cin2 &
|
||||
run_test ctest xccblat3 cin3 &
|
||||
run_test ctest xdcblat1 &
|
||||
run_test ctest xdcblat2 din2 &
|
||||
run_test ctest xdcblat3 din3 &
|
||||
run_test ctest xscblat1 &
|
||||
run_test ctest xscblat2 sin2 &
|
||||
run_test ctest xscblat3 sin3 &
|
||||
run_test ctest xzcblat1 &
|
||||
run_test ctest xzcblat2 zin2 &
|
||||
run_test ctest xzcblat3 zin3 &
|
||||
wait
|
||||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
|
||||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
|
||||
|
||||
- name: netlib tests
|
||||
shell: bash
|
||||
run: |
|
||||
: # these take a very long time
|
||||
echo "Skipping netlib tests in CI"
|
||||
exit 0
|
||||
: # comment out exit above to enable the tests
|
||||
: # probably we want to identify a subset to run in CI
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
export QEMU_CPU=${{ matrix.qemu_cpu }}
|
||||
rm -rf ./test_out
|
||||
mkdir -p ./test_out
|
||||
run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \
|
||||
echo "$4" >> $OUTPUT; \
|
||||
echo "$CMD" >> $OUTPUT; \
|
||||
qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \
|
||||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \
|
||||
if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \
|
||||
if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \
|
||||
}
|
||||
run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" &
|
||||
run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" &
|
||||
run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" &
|
||||
run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" &
|
||||
run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" &
|
||||
run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" &
|
||||
run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" &
|
||||
run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" &
|
||||
run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" &
|
||||
run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" &
|
||||
run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" &
|
||||
run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" &
|
||||
run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" &
|
||||
run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" &
|
||||
run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" &
|
||||
run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" &
|
||||
run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" &
|
||||
run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" &
|
||||
run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" &
|
||||
run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" &
|
||||
run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" &
|
||||
run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" &
|
||||
run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" &
|
||||
run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
|
||||
run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" &
|
||||
run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" &
|
||||
run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" &
|
||||
run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" &
|
||||
run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" &
|
||||
run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" &
|
||||
run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" &
|
||||
run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" &
|
||||
run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" &
|
||||
run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" &
|
||||
run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" &
|
||||
run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" &
|
||||
run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" &
|
||||
run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" &
|
||||
run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" &
|
||||
run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
|
||||
run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" &
|
||||
run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" &
|
||||
run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
wait
|
||||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
|
||||
python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary
|
||||
TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)"
|
||||
NUMERICAL_ERRORS=-1
|
||||
OTHER_ERRORS=-1
|
||||
. <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary
|
||||
if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi
|
||||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
|
10
README.md
10
README.md
|
@ -203,6 +203,16 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
|
|||
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
|
||||
```
|
||||
|
||||
- **ZVL???B**: Level-3 BLAS and Level-1,2 including vectorised kernels targeting generic RISCV cores with vector support with registers of at least the corresponding width; ZVL128B and ZVL256B are available.
|
||||
e.g.:
|
||||
```sh
|
||||
make TARGET=RISCV64_ZVL256B CFLAGS="-DTARGET=RISCV64_ZVL256B" \
|
||||
BINARY=64 ARCH=riscv64 CC='clang -target riscv64-unknown-linux-gnu' \
|
||||
AR=riscv64-unknown-linux-gnu-ar AS=riscv64-unknown-linux-gnu-gcc \
|
||||
LD=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j
|
||||
```
|
||||
|
||||
### Support for multiple targets in a single library
|
||||
|
||||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
|
||||
|
|
|
@ -91,7 +91,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
|
|||
#define BUFFER_SIZE ( 32 << 20)
|
||||
#define SEEK_ADDRESS
|
||||
|
||||
#if defined(C910V) || (defined(RISCV64_ZVL256B) && (defined(__clang__) || defined(RVV_COMPATIBLE_GCC))) || defined(RISCV64_ZVL128B) || defined(x280)
|
||||
#if defined(C910V) || defined(RISCV64_ZVL256B) || defined(RISCV64_ZVL128B) || defined(x280)
|
||||
# include <riscv_vector.h>
|
||||
#endif
|
||||
|
||||
|
|
|
@ -30,19 +30,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if !defined(DOUBLE)
|
||||
#define VSETVL(n) __riscv_vsetvl_e32m8(n)
|
||||
#define FLOAT_V_T vfloat32m8_t
|
||||
#define FLOAT_V_M1_T vfloat32m1_t
|
||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
||||
#define VSEV_FLOAT_M1 __riscv_vse32_v_f32m1
|
||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8
|
||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f32m8_f32m1
|
||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f32m1
|
||||
#else
|
||||
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
||||
#define FLOAT_V_T vfloat64m8_t
|
||||
#define FLOAT_V_M1_T vfloat64m1_t
|
||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
||||
#define VSEV_FLOAT_M1 __riscv_vse64_v_f64m1
|
||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8
|
||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||
#define VFREDSUMVS_FLOAT __riscv_vfredusum_vs_f64m8_f64m1
|
||||
#define VFMVVF_FLOAT_M1 __riscv_vfmv_v_f_f64m1
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
|
@ -76,7 +86,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
VSEV_FLOAT(y, vy, vl);
|
||||
}
|
||||
|
||||
} else if (1 == inc_x) {
|
||||
} else if (1 == inc_x && 0 != inc_y) {
|
||||
|
||||
BLASLONG stride_y = inc_y * sizeof(FLOAT);
|
||||
|
||||
|
@ -89,8 +99,20 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
VSSEV_FLOAT(y, stride_y, vy, vl);
|
||||
}
|
||||
|
||||
} else {
|
||||
} else if( 0 == inc_y ) {
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
size_t in_vl = VSETVL(n);
|
||||
vy = VFMVVF_FLOAT( y[0], in_vl );
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, x += vl*inc_x) {
|
||||
vl = VSETVL(n);
|
||||
vx = VLSEV_FLOAT(x, stride_x, vl);
|
||||
vy = VFMACCVF_FLOAT(vy, da, vx, vl);
|
||||
}
|
||||
FLOAT_V_M1_T vres = VFMVVF_FLOAT_M1( 0.0f, 1 );
|
||||
vres = VFREDSUMVS_FLOAT( vy, vres, in_vl );
|
||||
VSEV_FLOAT_M1(y, vres, 1);
|
||||
} else {
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
BLASLONG stride_y = inc_y * sizeof(FLOAT);
|
||||
|
||||
|
|
|
@ -51,11 +51,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define VSETVL JOIN(RISCV_RVV(vsetvl), _e, ELEN, LMUL, _)
|
||||
#define FLOAT_V_T JOIN(vfloat, ELEN, LMUL, _t, _)
|
||||
#define FLOAT_V_M1_T JOIN(vfloat, ELEN, m1, _t, _)
|
||||
#define VLEV_FLOAT JOIN(RISCV_RVV(vle), ELEN, _v_f, ELEN, LMUL)
|
||||
#define VLSEV_FLOAT JOIN(RISCV_RVV(vlse), ELEN, _v_f, ELEN, LMUL)
|
||||
#define VSEV_FLOAT JOIN(RISCV_RVV(vse), ELEN, _v_f, ELEN, LMUL)
|
||||
#define VSSEV_FLOAT JOIN(RISCV_RVV(vsse), ELEN, _v_f, ELEN, LMUL)
|
||||
#define VFMACCVF_FLOAT JOIN(RISCV_RVV(vfmacc), _vf_f, ELEN, LMUL, _)
|
||||
#define VFMVVF_FLOAT JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, LMUL, _)
|
||||
#define VFMVVF_FLOAT_M1 JOIN(RISCV_RVV(vfmv), _v_f_f, ELEN, m1, _)
|
||||
|
||||
#ifdef RISCV_0p10_INTRINSICS
|
||||
#define VFREDSUMVS_FLOAT(va, vb, gvl) JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))(v_res, va, vb, gvl)
|
||||
#else
|
||||
#define VFREDSUMVS_FLOAT JOIN(RISCV_RVV(vfredusum_vs_f), ELEN, LMUL, _f, JOIN2( ELEN, m1))
|
||||
#endif
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
|
@ -123,7 +132,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
VSEV_FLOAT(&y[j], vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
}else if(inc_x == 1){
|
||||
} else if (1 == inc_x && 0 != inc_y) {
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
gvl = VSETVL(n);
|
||||
if(gvl <= n/2){
|
||||
|
@ -151,6 +160,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
VSSEV_FLOAT(&y[j*inc_y], stride_y, vy0, gvl);
|
||||
j += gvl;
|
||||
}
|
||||
} else if( 0 == inc_y ) {
|
||||
BLASLONG stride_x = inc_x * sizeof(FLOAT);
|
||||
size_t in_vl = VSETVL(n);
|
||||
vy0 = VFMVVF_FLOAT( y[0], in_vl );
|
||||
|
||||
for (size_t vl; n > 0; n -= vl, x += vl*inc_x) {
|
||||
vl = VSETVL(n);
|
||||
vx0 = VLSEV_FLOAT(x, stride_x, vl);
|
||||
vy0 = VFMACCVF_FLOAT(vy0, da, vx0, vl);
|
||||
}
|
||||
FLOAT_V_M1_T v_res = VFMVVF_FLOAT_M1( 0.0f, 1 );
|
||||
v_res = VFREDSUMVS_FLOAT( vy0, v_res, in_vl );
|
||||
y[0] = EXTRACT_FLOAT(v_res);
|
||||
}else{
|
||||
stride_x = inc_x * sizeof(FLOAT);
|
||||
stride_y = inc_y * sizeof(FLOAT);
|
||||
|
|
Loading…
Reference in New Issue