diff --git a/Makefile.system b/Makefile.system index dc3e77e35..36e4ef05e 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1711,6 +1711,8 @@ ifndef NO_MSA export HAVE_MSA export MSA_FLAGS endif +export HAVE_LSX +export HAVE_LASX export KERNELDIR export FUNCTION_PROFILE export TARGET_CORE diff --git a/c_check b/c_check index 01d4f4a7c..7f72b05b2 100755 --- a/c_check +++ b/c_check @@ -182,6 +182,39 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then rm -rf "$tmpd" fi +have_lsx=0 +have_lasx=0 +if [ "$architecture" = "loongarch64" ]; then + tmpd="$(mktemp -d)" + tmplsx="$tmpd/lsx.c" + codelsx='"vadd.b $vr0, $vr0, $vr0"' + lsx_flags='-march=loongarch64 -mlsx -mabi=lp64d' + printf "#include \n\n" >> "$tmplsx" + printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx" + args="$lsx_flags -o $tmplsx.o $tmplsx" + have_lsx=1 + { + $compiler_name $flags $args >/dev/null 2>&1 + } || { + have_lsx=0 + } + + tmplasx="$tmpd/lasx.c" + codelasx='"xvadd.b $xr0, $xr0, $xr0"' + lasx_flags='-march=loongarch64 -mlasx -mabi=lp64d' + printf "#include \n\n" >> "$tmplasx" + printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx" + args="$lasx_flags -o $tmplasx.o $tmplasx" + have_lasx=1 + { + $compiler_name $flags $args >/dev/null 2>&1 + } || { + have_lasx=0 + } + + rm -rf "$tmpd" +fi + case "$data" in *ARCH_X86_64*) architecture=x86_64 ;; *ARCH_X86*) architecture=x86 ;; @@ -383,6 +416,8 @@ done [ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n" [ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n" [ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n" + [ "$have_lsx" -eq 1 ] && printf "HAVE_LSX=1\n" + [ "$have_lasx" -eq 1 ] && printf "HAVE_LASX=1\n" } >> "$makefile" os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ ` @@ -397,6 +432,8 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' ` [ "$binformat" = "bin64" ] && printf "#define __64BIT__\t1\n" [ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu" [ "$have_msa" -eq 1 ] && printf "#define HAVE_MSA\t1\n" + [ "$have_lsx" -eq 1 ] && printf "#define HAVE_LSX\t1\n" + [ "$have_lasx" -eq 1 ] && printf "#define HAVE_LASX\t1\n" [ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n" } >> "$config" diff --git a/c_check.pl b/c_check.pl index 6ce28e11b..bd9b1e1c5 100644 --- a/c_check.pl +++ b/c_check.pl @@ -232,6 +232,47 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) { } } +$have_lsx = 0; +$have_lasx = 0; +if (($architecture eq "loongarch64")) { + eval "use File::Temp qw(tempfile)"; + if ($@){ + warn "could not load PERL module File::Temp, so could not check LSX and LASX capatibility"; + } else { + $tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); + $codelsx = '"vadd.b $vr0, $vr0, $vr0"'; + $lsx_flags = "-march=loongarch64 -mlsx -mabi=lp64d"; + print $tmplsx "#include \n\n"; + print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n"; + + $args = "$lsx_flags -o $tmplsx.o $tmplsx"; + my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); + system(@cmd) == 0; + if ($? != 0) { + $have_lsx = 0; + } else { + $have_lsx = 1; + } + unlink("$tmplsx.o"); + + $tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); + $codelasx = '"xvadd.b $xr0, $xr0, $xr0"'; + $lasx_flags = "-march=loongarch64 -mlasx -mabi=lp64d"; + print $tmplasx "#include \n\n"; + print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n"; + + $args = "$lasx_flags -o $tmplasx.o $tmplasx"; + my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null"); + system(@cmd) == 0; + if ($? != 0) { + $have_lasx = 0; + } else { + $have_lasx = 1; + } + unlink("$tmplasx.o"); + } +} + $architecture = x86 if ($data =~ /ARCH_X86/); $architecture = x86_64 if ($data =~ /ARCH_X86_64/); $architecture = e2k if ($data =~ /ARCH_E2K/); @@ -419,6 +460,8 @@ print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne print MAKEFILE "CROSS=1\n" if $cross != 0; print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; +print MAKEFILE "HAVE_LSX=1\n" if $have_lsx eq 1; +print MAKEFILE "HAVE_LASX=1\n" if $have_lasx eq 1; print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; print MAKEFILE "NO_RV64GV=1\n" if $no_rv64gv eq 1; print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; @@ -436,6 +479,8 @@ print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; +print CONFFILE "#define HAVE_LSX\t1\n" if $have_lsx eq 1; +print CONFFILE "#define HAVE_LASX\t1\n" if $have_lasx eq 1; print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1; diff --git a/kernel/loongarch64/KERNEL.LA464 b/kernel/loongarch64/KERNEL.LA464 index cda359040..98b9c92a8 100644 --- a/kernel/loongarch64/KERNEL.LA464 +++ b/kernel/loongarch64/KERNEL.LA464 @@ -1,3 +1,4 @@ +ifdef HAVE_LASX DGEMMKERNEL = dgemm_kernel_16x4.S DGEMMINCOPY = dgemm_ncopy_16.S DGEMMITCOPY = dgemm_tcopy_16.S @@ -7,6 +8,7 @@ DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) +endif DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c diff --git a/param.h b/param.h index 251c642a1..62f56c172 100644 --- a/param.h +++ b/param.h @@ -2843,15 +2843,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x0ffffUL -#define SGEMM_DEFAULT_UNROLL_N 8 +#if defined(HAVE_LASX) #define DGEMM_DEFAULT_UNROLL_N 4 +#define DGEMM_DEFAULT_UNROLL_M 16 +#else +#define DGEMM_DEFAULT_UNROLL_N 8 +#define DGEMM_DEFAULT_UNROLL_M 2 +#endif + +#define SGEMM_DEFAULT_UNROLL_N 8 #define QGEMM_DEFAULT_UNROLL_N 2 #define CGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 4 #define XGEMM_DEFAULT_UNROLL_N 1 #define SGEMM_DEFAULT_UNROLL_M 2 -#define DGEMM_DEFAULT_UNROLL_M 16 #define QGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_M 1 #define ZGEMM_DEFAULT_UNROLL_M 1