Merge pull request #4127 from XiWeiGu/LoongArch64-CI

LoongArch64 CI
This commit is contained in:
Martin Kroeker 2023-08-05 18:19:47 +02:00 committed by GitHub
commit ef4a7e3fca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 208 additions and 14 deletions

110
.github/workflows/loongarch64.yml vendored Normal file
View File

@ -0,0 +1,110 @@
name: loongarch64 qemu test
on: [push, pull_request]
jobs:
TEST:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: LOONGSONGENERIC
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 TARGET=LOONGSONGENERIC
- target: LOONGSON3R5
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 TARGET=LOONGSON2K1000
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install APT deps
run: |
sudo add-apt-repository ppa:savoury1/virtualisation
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
qemu-user-static
- name: Download and install loongarch64-toolchain
run: |
wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz
tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt
- name: Set env
run: |
echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
echo "PATH=$GITHUB_WORKSPACE:/opt/cross-tools/bin:$PATH" >> $GITHUB_ENV
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Disable utest dsdot:dsdot_n_1
run: |
echo -n > utest/test_dsdot.c
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
- name: Build OpenBLAS
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat

View File

@ -1770,6 +1770,8 @@ export TARGET_CORE
export NO_AVX512
export NO_AVX2
export BUILD_BFLOAT16
export NO_LSX
export NO_LASX
export SBGEMM_UNROLL_M
export SBGEMM_UNROLL_N

35
c_check
View File

@ -185,6 +185,37 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
rm -rf "$tmpd"
fi
no_lsx=0
no_lasx=0
if [ "$architecture" = "loongarch64" ]; then
tmpd="$(mktemp -d)"
tmplsx="$tmpd/lsx.c"
codelsx='"vadd.b $vr0, $vr0, $vr0"'
lsx_flags='-march=loongarch64 -mlsx'
printf "#include <lsxintrin.h>\n\n" >> "$tmplsx"
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx"
args="$lsx_flags -o $tmplsx.o $tmplsx"
{
$compiler_name $flags $args >/dev/null 2>&1
} || {
no_lsx=1
}
tmplasx="$tmpd/lasx.c"
codelasx='"xvadd.b $xr0, $xr0, $xr0"'
lasx_flags='-march=loongarch64 -mlasx'
printf "#include <lasxintrin.h>\n\n" >> "$tmplasx"
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx"
args="$lasx_flags -o $tmplasx.o $tmplasx"
{
$compiler_name $flags $args >/dev/null 2>&1
} || {
no_lasx=1
}
rm -rf "$tmpd"
fi
case "$data" in
*ARCH_X86_64*) architecture=x86_64 ;;
*ARCH_X86*) architecture=x86 ;;
@ -399,6 +430,8 @@ done
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
[ "$no_lsx" -eq 1 ] && printf "NO_LSX=1\n"
[ "$no_lasx" -eq 1 ] && printf "NO_LASX=1\n"
} >> "$makefile"
os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ `
@ -414,6 +447,8 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' `
[ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu"
[ "$no_msa" -eq 1 ] && printf "#define NO_MSA\t1\n"
[ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n"
[ "$no_lsx" -eq 1 ] && printf "#define NO_LSX\t1\n"
[ "$no_lasx" -eq 1 ] && printf "#define NO_LASX\t1\n"
} >> "$config"

View File

@ -232,6 +232,47 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) {
}
}
$no_lsx = 0;
$no_lasx = 0;
if (($architecture eq "loongarch64")) {
eval "use File::Temp qw(tempfile)";
if ($@){
warn "could not load PERL module File::Temp, so could not check LSX and LASX capatibility";
} else {
$tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
$codelsx = '"vadd.b $vr0, $vr0, $vr0"';
$lsx_flags = "-march=loongarch64 -mlsx";
print $tmplsx "#include <lsxintrin.h>\n\n";
print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n";
$args = "$lsx_flags -o $tmplsx.o $tmplsx";
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$no_lsx = 1;
} else {
$no_lsx = 0;
}
unlink("$tmplsx.o");
$tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
$codelasx = '"xvadd.b $xr0, $xr0, $xr0"';
$lasx_flags = "-march=loongarch64 -mlasx";
print $tmplasx "#include <lasxintrin.h>\n\n";
print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n";
$args = "$lasx_flags -o $tmplasx.o $tmplasx";
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$no_lasx = 1;
} else {
$no_lasx = 0;
}
unlink("$tmplasx.o");
}
}
$architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
$architecture = e2k if ($data =~ /ARCH_E2K/);
@ -424,6 +465,8 @@ print MAKEFILE "NO_RV64GV=1\n" if $no_rv64gv eq 1;
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1;
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1;
print MAKEFILE "NO_LSX=1\n" if $no_lsx eq 1;
print MAKEFILE "NO_LASX=1\n" if $no_lasx eq 1;
$os =~ tr/[a-z]/[A-Z]/;
$architecture =~ tr/[a-z]/[A-Z]/;
@ -437,6 +480,8 @@ print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1;
print CONFFILE "#define NO_LSX\t1\n" if $no_lsx eq 1;
print CONFFILE "#define NO_LASX\t1\n" if $no_lasx eq 1;
if ($os eq "LINUX") {

View File

@ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include <stdint.h>
#include <sys/auxv.h>
/* If LASX extension instructions supported,
* using core LOONGSON3R5
@ -46,9 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CPU_LOONGSON3R5 1
#define CPU_LOONGSON2K1000 2
#define LOONGARCH_CFG2 0x02
#define LOONGARCH_LASX 1<<7
#define LOONGARCH_LSX 1<<6
#define LA_HWCAP_LSX (1<<4)
#define LA_HWCAP_LASX (1<<5)
static char *cpuname[] = {
"LOONGSONGENERIC",
@ -64,17 +64,11 @@ static char *cpuname_lower[] = {
int detect(void) {
#ifdef __linux
uint32_t reg = 0;
int flag = (int)getauxval(AT_HWCAP);
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg)
: "r"(LOONGARCH_CFG2)
);
if (reg & LOONGARCH_LASX)
if (flag & LA_HWCAP_LASX)
return CPU_LOONGSON3R5;
else if (reg & LOONGARCH_LSX)
else if (flag & LA_HWCAP_LSX)
return CPU_LOONGSON2K1000;
else
return CPU_GENERIC;

View File

@ -1,3 +1,4 @@
ifndef NO_LASX
DGEMMKERNEL = dgemm_kernel_16x4.S
DGEMMINCOPY = dgemm_ncopy_16.S
DGEMMITCOPY = dgemm_tcopy_16.S
@ -7,6 +8,7 @@ DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
endif
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

10
param.h
View File

@ -2845,15 +2845,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
#define SGEMM_DEFAULT_UNROLL_N 8
#if defined(NO_LASX)
#define DGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_M 2
#else
#define DGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_M 16
#endif
#define SGEMM_DEFAULT_UNROLL_N 8
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_N 4
#define XGEMM_DEFAULT_UNROLL_N 1
#define SGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_M 16
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 1
#define ZGEMM_DEFAULT_UNROLL_M 1