commit
						ef4a7e3fca
					
				|  | @ -0,0 +1,110 @@ | |||
| name: loongarch64 qemu test | ||||
| 
 | ||||
| on: [push, pull_request] | ||||
| 
 | ||||
| jobs: | ||||
|   TEST: | ||||
|     runs-on: ubuntu-latest | ||||
|     strategy: | ||||
|       fail-fast: false | ||||
|       matrix: | ||||
|         include: | ||||
|           - target: LOONGSONGENERIC | ||||
|             triple:  loongarch64-unknown-linux-gnu | ||||
|             opts: NO_SHARED=1 TARGET=LOONGSONGENERIC | ||||
|           - target: LOONGSON3R5 | ||||
|             triple: loongarch64-unknown-linux-gnu | ||||
|             opts: NO_SHARED=1 TARGET=LOONGSON3R5 | ||||
|           - target: LOONGSON2K1000 | ||||
|             triple: loongarch64-unknown-linux-gnu | ||||
|             opts: NO_SHARED=1 TARGET=LOONGSON2K1000 | ||||
| 
 | ||||
|     steps: | ||||
|       - name: Checkout repository | ||||
|         uses: actions/checkout@v3 | ||||
| 
 | ||||
|       - name: Install APT deps | ||||
|         run: | | ||||
|           sudo add-apt-repository ppa:savoury1/virtualisation | ||||
|           sudo apt-get update | ||||
|           sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ | ||||
|           qemu-user-static | ||||
| 
 | ||||
|       - name: Download and install loongarch64-toolchain | ||||
|         run: | | ||||
|           wget https://github.com/loongson/build-tools/releases/download/2022.09.06/loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz | ||||
|           tar -xf loongarch64-clfs-7.3-cross-tools-gcc-glibc.tar.xz -C /opt | ||||
| 
 | ||||
|       - name: Set env | ||||
|         run: | | ||||
|           echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV | ||||
|           echo "PATH=$GITHUB_WORKSPACE:/opt/cross-tools/bin:$PATH" >> $GITHUB_ENV | ||||
| 
 | ||||
|       - name: Compilation cache | ||||
|         uses: actions/cache@v3 | ||||
|         with: | ||||
|           path: ~/.ccache | ||||
|           key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | ||||
|           restore-keys: | | ||||
|             ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | ||||
|             ccache-${{ runner.os }}-${{ matrix.target }} | ||||
| 
 | ||||
|       - name: Configure ccache | ||||
|         run: | | ||||
|           test -d ~/.ccache || mkdir -p ~/.ccache | ||||
|           echo "max_size = 300M" > ~/.ccache/ccache.conf | ||||
|           echo "compression = true" >> ~/.ccache/ccache.conf | ||||
|           ccache -s | ||||
| 
 | ||||
|       - name: Disable utest dsdot:dsdot_n_1 | ||||
|         run: | | ||||
|           echo -n > utest/test_dsdot.c | ||||
|           echo "Due to the qemu versions 7.2 causing utest cases to fail," | ||||
|           echo "the utest dsdot:dsdot_n_1 have been temporarily disabled." | ||||
| 
 | ||||
|       - name: Build OpenBLAS | ||||
|         run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) | ||||
| 
 | ||||
|       - name: Test | ||||
|         run: | | ||||
|           qemu-loongarch64-static ./utest/openblas_utest | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3 | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1 | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1 | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1 | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1 | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1 | ||||
|           rm -f ./test/?BLAT2.SUMM | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | ||||
|           rm -f ./test/?BLAT2.SUMM | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | ||||
|           rm -f ./test/?BLAT3.SUMM | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | ||||
|           OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat | ||||
|           rm -f ./test/?BLAT3.SUMM | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | ||||
|           OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat | ||||
|  | @ -1770,6 +1770,8 @@ export TARGET_CORE | |||
| export NO_AVX512 | ||||
| export NO_AVX2 | ||||
| export BUILD_BFLOAT16 | ||||
| export NO_LSX | ||||
| export NO_LASX | ||||
| 
 | ||||
| export SBGEMM_UNROLL_M | ||||
| export SBGEMM_UNROLL_N | ||||
|  |  | |||
							
								
								
									
										35
									
								
								c_check
								
								
								
								
							
							
						
						
									
										35
									
								
								c_check
								
								
								
								
							|  | @ -185,6 +185,37 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then | |||
|     rm -rf "$tmpd" | ||||
| fi | ||||
| 
 | ||||
| no_lsx=0 | ||||
| no_lasx=0 | ||||
| if [ "$architecture" = "loongarch64" ]; then | ||||
|     tmpd="$(mktemp -d)" | ||||
|     tmplsx="$tmpd/lsx.c" | ||||
|     codelsx='"vadd.b $vr0, $vr0, $vr0"' | ||||
|     lsx_flags='-march=loongarch64 -mlsx' | ||||
|     printf "#include <lsxintrin.h>\n\n" >> "$tmplsx" | ||||
|     printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx" | ||||
|     args="$lsx_flags -o $tmplsx.o $tmplsx" | ||||
|     { | ||||
|         $compiler_name $flags $args >/dev/null 2>&1 | ||||
|     } || { | ||||
|         no_lsx=1 | ||||
|     } | ||||
| 
 | ||||
|     tmplasx="$tmpd/lasx.c" | ||||
|     codelasx='"xvadd.b $xr0, $xr0, $xr0"' | ||||
|     lasx_flags='-march=loongarch64 -mlasx' | ||||
|     printf "#include <lasxintrin.h>\n\n" >> "$tmplasx" | ||||
|     printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx" | ||||
|     args="$lasx_flags -o $tmplasx.o $tmplasx" | ||||
|     { | ||||
|         $compiler_name $flags $args >/dev/null 2>&1 | ||||
|     } || { | ||||
|         no_lasx=1 | ||||
|     } | ||||
| 
 | ||||
|     rm -rf "$tmpd" | ||||
| fi | ||||
| 
 | ||||
| case "$data" in | ||||
|     *ARCH_X86_64*) architecture=x86_64 ;; | ||||
|     *ARCH_X86*) architecture=x86 ;; | ||||
|  | @ -399,6 +430,8 @@ done | |||
|     [ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n" | ||||
|     [ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n" | ||||
|     [ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n" | ||||
|     [ "$no_lsx" -eq 1 ] && printf "NO_LSX=1\n" | ||||
|     [ "$no_lasx" -eq 1 ] && printf "NO_LASX=1\n" | ||||
| } >> "$makefile" | ||||
| 
 | ||||
| os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ ` | ||||
|  | @ -414,6 +447,8 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' ` | |||
|     [ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu" | ||||
|     [ "$no_msa" -eq 1 ] && printf "#define NO_MSA\t1\n" | ||||
|     [ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n" | ||||
|     [ "$no_lsx" -eq 1 ] && printf "#define NO_LSX\t1\n" | ||||
|     [ "$no_lasx" -eq 1 ] && printf "#define NO_LASX\t1\n" | ||||
| } >> "$config" | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										45
									
								
								c_check.pl
								
								
								
								
							
							
						
						
									
										45
									
								
								c_check.pl
								
								
								
								
							|  | @ -232,6 +232,47 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| $no_lsx = 0; | ||||
| $no_lasx = 0; | ||||
| if (($architecture eq "loongarch64")) { | ||||
|     eval "use File::Temp qw(tempfile)"; | ||||
|     if ($@){ | ||||
| 	warn "could not load PERL module File::Temp, so could not check LSX and LASX capatibility"; | ||||
|     } else { | ||||
| 	$tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | ||||
| 	$codelsx = '"vadd.b $vr0, $vr0, $vr0"'; | ||||
| 	$lsx_flags = "-march=loongarch64 -mlsx"; | ||||
| 	print $tmplsx "#include <lsxintrin.h>\n\n"; | ||||
| 	print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n"; | ||||
| 
 | ||||
| 	$args = "$lsx_flags -o $tmplsx.o $tmplsx"; | ||||
| 	my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | ||||
| 	system(@cmd) == 0; | ||||
| 	if ($? != 0) { | ||||
| 	    $no_lsx = 1; | ||||
| 	} else { | ||||
| 	    $no_lsx = 0; | ||||
| 	} | ||||
| 	unlink("$tmplsx.o"); | ||||
| 
 | ||||
| 	$tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | ||||
| 	$codelasx = '"xvadd.b $xr0, $xr0, $xr0"'; | ||||
| 	$lasx_flags = "-march=loongarch64 -mlasx"; | ||||
| 	print $tmplasx "#include <lasxintrin.h>\n\n"; | ||||
| 	print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n"; | ||||
| 
 | ||||
| 	$args = "$lasx_flags -o $tmplasx.o $tmplasx"; | ||||
| 	my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); | ||||
| 	system(@cmd) == 0; | ||||
| 	if ($? != 0) { | ||||
| 	    $no_lasx = 1; | ||||
| 	} else { | ||||
| 	    $no_lasx = 0; | ||||
| 	} | ||||
| 	unlink("$tmplasx.o"); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| $architecture = x86          if ($data =~ /ARCH_X86/); | ||||
| $architecture = x86_64       if ($data =~ /ARCH_X86_64/); | ||||
| $architecture = e2k          if ($data =~ /ARCH_E2K/); | ||||
|  | @ -424,6 +465,8 @@ print MAKEFILE "NO_RV64GV=1\n" if $no_rv64gv eq 1; | |||
| print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | ||||
| print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1; | ||||
| print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1; | ||||
| print MAKEFILE "NO_LSX=1\n" if $no_lsx eq 1; | ||||
| print MAKEFILE "NO_LASX=1\n" if $no_lasx eq 1; | ||||
| 
 | ||||
| $os           =~ tr/[a-z]/[A-Z]/; | ||||
| $architecture =~ tr/[a-z]/[A-Z]/; | ||||
|  | @ -437,6 +480,8 @@ print CONFFILE "#define __64BIT__\t1\n"  if $binformat eq bin64; | |||
| print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; | ||||
| print CONFFILE "#define HAVE_MSA\t1\n"  if $have_msa eq 1; | ||||
| print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1; | ||||
| print CONFFILE "#define NO_LSX\t1\n" if $no_lsx eq 1; | ||||
| print CONFFILE "#define NO_LASX\t1\n" if $no_lasx eq 1; | ||||
| 
 | ||||
| 
 | ||||
| if ($os eq "LINUX") { | ||||
|  |  | |||
|  | @ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| **********************************************************************************/ | ||||
| 
 | ||||
| #include <stdint.h> | ||||
| #include <sys/auxv.h> | ||||
| 
 | ||||
| /*  If LASX extension instructions supported,
 | ||||
|  *  using core LOONGSON3R5 | ||||
|  | @ -46,9 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define CPU_LOONGSON3R5    1 | ||||
| #define CPU_LOONGSON2K1000 2 | ||||
| 
 | ||||
| #define LOONGARCH_CFG2  0x02 | ||||
| #define LOONGARCH_LASX  1<<7 | ||||
| #define LOONGARCH_LSX   1<<6 | ||||
| #define LA_HWCAP_LSX    (1<<4) | ||||
| #define LA_HWCAP_LASX   (1<<5) | ||||
| 
 | ||||
| static char *cpuname[] = { | ||||
|   "LOONGSONGENERIC", | ||||
|  | @ -64,17 +64,11 @@ static char *cpuname_lower[] = { | |||
| 
 | ||||
| int detect(void) { | ||||
| #ifdef __linux | ||||
|   uint32_t reg = 0; | ||||
|   int flag  = (int)getauxval(AT_HWCAP); | ||||
| 
 | ||||
|   __asm__ volatile ( | ||||
|     "cpucfg %0, %1 \n\t" | ||||
|     : "+&r"(reg) | ||||
|     : "r"(LOONGARCH_CFG2) | ||||
|   ); | ||||
| 
 | ||||
|   if (reg & LOONGARCH_LASX) | ||||
|   if (flag & LA_HWCAP_LASX) | ||||
|     return CPU_LOONGSON3R5; | ||||
|   else if (reg & LOONGARCH_LSX) | ||||
|   else if (flag & LA_HWCAP_LSX) | ||||
|     return CPU_LOONGSON2K1000; | ||||
|   else | ||||
|     return CPU_GENERIC; | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| ifndef NO_LASX | ||||
| DGEMMKERNEL    = dgemm_kernel_16x4.S | ||||
| DGEMMINCOPY    = dgemm_ncopy_16.S | ||||
| DGEMMITCOPY    = dgemm_tcopy_16.S | ||||
|  | @ -7,6 +8,7 @@ DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) | |||
| DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
| DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
| endif | ||||
| 
 | ||||
| DTRSMKERNEL_LN  = ../generic/trsm_kernel_LN.c | ||||
| DTRSMKERNEL_LT  = ../generic/trsm_kernel_LT.c | ||||
|  |  | |||
							
								
								
									
										10
									
								
								param.h
								
								
								
								
							
							
						
						
									
										10
									
								
								param.h
								
								
								
								
							|  | @ -2845,15 +2845,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | ||||
| #define GEMM_DEFAULT_ALIGN 0x0ffffUL | ||||
| 
 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 8 | ||||
| #if defined(NO_LASX) | ||||
| #define DGEMM_DEFAULT_UNROLL_N 8 | ||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | ||||
| #else | ||||
| #define DGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define DGEMM_DEFAULT_UNROLL_M 16 | ||||
| #endif | ||||
| 
 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 8 | ||||
| #define QGEMM_DEFAULT_UNROLL_N 2 | ||||
| #define CGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define ZGEMM_DEFAULT_UNROLL_N 4 | ||||
| #define XGEMM_DEFAULT_UNROLL_N 1 | ||||
| 
 | ||||
| #define SGEMM_DEFAULT_UNROLL_M 2 | ||||
| #define DGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define QGEMM_DEFAULT_UNROLL_M 2 | ||||
| #define CGEMM_DEFAULT_UNROLL_M 1 | ||||
| #define ZGEMM_DEFAULT_UNROLL_M 1 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue