diff --git a/Makefile.prebuild b/Makefile.prebuild index ee0b67787..524f0a741 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -17,14 +17,26 @@ ifdef CPUIDEMU EXFLAGS = -DCPUIDEMU -DVENDOR=99 endif +ifeq ($(TARGET), P5600) +TARGET_FLAGS = -mips32r5 +endif + +ifeq ($(TARGET), I6400) +TARGET_FLAGS = -mips64r6 +endif + +ifeq ($(TARGET), P6600) +TARGET_FLAGS = -mips64r6 +endif + all: getarch_2nd ./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 1 >> $(TARGET_CONF) config.h : c_check f_check getarch - perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) + perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) ifneq ($(ONLY_CBLAS), 1) - perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) + perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) else #When we only build CBLAS, we set NOFORTRAN=2 echo "NOFORTRAN=2" >> $(TARGET_MAKE) diff --git a/Makefile.system b/Makefile.system index e3abb5723..b05177b6c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -159,7 +159,7 @@ ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 # Generating Makefile.conf and config.h -DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all) +DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all) ifndef TARGET_CORE include $(TOPDIR)/Makefile.conf @@ -502,13 +502,16 @@ endif ifdef NO_BINARY_MODE -ifeq ($(ARCH), $(filter $(ARCH),mips64 mips)) +ifeq ($(ARCH), $(filter $(ARCH),mips64)) ifdef BINARY64 CCOMMON_OPT += -mabi=64 else -CCOMMON_OPT += -mabi=32 +CCOMMON_OPT += -mabi=n32 endif BINARY_DEFINED = 1 +else ifeq ($(ARCH), $(filter $(ARCH),mips)) +CCOMMON_OPT += -mabi=32 +BINARY_DEFINED = 1 endif ifeq ($(CORE), LOONGSON3A) @@ -522,13 +525,18 @@ FCOMMON_OPT += -march=mips64 endif ifeq ($(CORE), P5600) -CCOMMON_OPT += -mmsa -FCOMMON_OPT += -mmsa +CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS) +FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS) endif -ifneq ($(filter $(CORE), I6400 P6600),) -CCOMMON_OPT += -mmsa -FCOMMON_OPT += -mmsa +ifeq ($(CORE), I6400) +CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS) +FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS) +endif + +ifeq ($(CORE), P6600) +CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS) +FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS) endif ifeq ($(OSNAME), AIX) @@ -599,12 +607,14 @@ ifneq ($(NO_LAPACK), 1) EXTRALIB += -lgfortran endif ifdef NO_BINARY_MODE -ifeq ($(ARCH), $(filter $(ARCH),mips64 mips)) +ifeq ($(ARCH), $(filter $(ARCH),mips64)) ifdef BINARY64 FCOMMON_OPT += -mabi=64 else -FCOMMON_OPT += -mabi=32 +FCOMMON_OPT += -mabi=n32 endif +else ifeq ($(ARCH), $(filter $(ARCH),mips)) +FCOMMON_OPT += -mabi=32 endif else ifdef BINARY64 @@ -687,20 +697,6 @@ FCOMMON_OPT += -i8 endif endif endif - -ifeq ($(filter $(ARCH),mips64 mips)) -ifndef BINARY64 -FCOMMON_OPT += -m32 -else -FCOMMON_OPT += -m64 -endif -else -ifdef BINARY64 -FCOMMON_OPT += -mabi=64 -else -FCOMMON_OPT += -mabi=32 -endif -endif ifeq ($(USE_OPENMP), 1) FCOMMON_OPT += -mp @@ -1136,6 +1132,8 @@ export HAVE_VFP export HAVE_VFPV3 export HAVE_VFPV4 export HAVE_NEON +export HAVE_MSA +export MSA_FLAGS export KERNELDIR export FUNCTION_PROFILE export TARGET_CORE diff --git a/c_check b/c_check index 4569d842a..2ec9fc484 100644 --- a/c_check +++ b/c_check @@ -1,6 +1,7 @@ #!/usr/bin/perl use File::Basename; +use File::Temp qw(tempfile); # Checking cross compile $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); @@ -10,6 +11,7 @@ $hostarch = "arm" if ($hostarch =~ /^arm.*/); $hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); +$tmpf = new File::Temp( UNLINK => 1 ); $binary = $ENV{"BINARY"}; $makefile = shift(@ARGV); @@ -79,10 +81,15 @@ if ($os eq "AIX") { $defined = 1; } -if (($architecture eq "mips") || ($architecture eq "mips64")) { - $compiler_name .= " -mabi=32" if ($binary eq "32"); - $compiler_name .= " -mabi=64" if ($binary eq "64"); - $defined = 1; +if ($architecture eq "mips") { + $compiler_name .= " -mabi=32"; + $defined = 1; +} + +if ($architecture eq "mips64") { + $compiler_name .= " -mabi=n32" if ($binary eq "32"); + $compiler_name .= " -mabi=64" if ($binary eq "64"); + $defined = 1; } if (($architecture eq "arm") || ($architecture eq "arm64")) { @@ -152,6 +159,24 @@ if ($?) { die 1; } +$have_msa = 0; +if (($architecture eq "mips") || ($architecture eq "mips64")) { + $code = '"addvi.b $w0, $w1, 1"'; + $msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs"; + print $tmpf "#include \n\n"; + print $tmpf "void main(void){ __asm__ volatile($code); }\n"; + + $args = "$msa_flags -o $tmpf.o -x c $tmpf"; + my @cmd = ("$compiler_name $args"); + system(@cmd) == 0; + if ($? != 0) { + $have_msa = 0; + } else { + $have_msa = 1; + } + unlink("$tmpf.o"); +} + $architecture = x86 if ($data =~ /ARCH_X86/); $architecture = x86_64 if ($data =~ /ARCH_X86_64/); $architecture = power if ($data =~ /ARCH_POWER/); @@ -246,6 +271,8 @@ print MAKEFILE "FU=$need_fu\n" if $need_fu ne ""; print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne ""; print MAKEFILE "CROSS=1\n" if $cross != 0; print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; +print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; +print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; $os =~ tr/[a-z]/[A-Z]/; $architecture =~ tr/[a-z]/[A-Z]/; @@ -257,6 +284,7 @@ print CONFFILE "#define C_$compiler\t1\n"; print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; +print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; if ($os eq "LINUX") { diff --git a/f_check b/f_check index 171c91f95..2f01f1c44 100644 --- a/f_check +++ b/f_check @@ -223,7 +223,12 @@ if (!$?) { } #For gfortran MIPS if ($?) { - $link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; + $mips_data = `$compiler_bin -E -dM - < /dev/null`; + if ($mips_data =~ /_MIPS_ISA_MIPS64/) { + $link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; + } else { + $link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; + } } $binary = "" if ($?); } diff --git a/kernel/mips/KERNEL.P5600 b/kernel/mips/KERNEL.P5600 index 7bf90c905..683579221 100644 --- a/kernel/mips/KERNEL.P5600 +++ b/kernel/mips/KERNEL.P5600 @@ -30,10 +30,17 @@ IDMAXKERNEL = ../mips/imax.c ISMINKERNEL = ../mips/imin.c IDMINKERNEL = ../mips/imin.c +ifdef HAVE_MSA SASUMKERNEL = ../mips/sasum_msa.c DASUMKERNEL = ../mips/dasum_msa.c CASUMKERNEL = ../mips/casum_msa.c ZASUMKERNEL = ../mips/zasum_msa.c +else +SASUMKERNEL = ../mips/asum.c +DASUMKERNEL = ../mips/asum.c +CASUMKERNEL = ../mips/asum.c +ZASUMKERNEL = ../mips/asum.c +endif SAXPYKERNEL = ../mips/axpy.c DAXPYKERNEL = ../mips/axpy.c @@ -45,10 +52,17 @@ DCOPYKERNEL = ../mips/copy.c CCOPYKERNEL = ../mips/zcopy.c ZCOPYKERNEL = ../mips/zcopy.c +ifdef HAVE_MSA SDOTKERNEL = ../mips/sdot_msa.c DDOTKERNEL = ../mips/ddot_msa.c CDOTKERNEL = ../mips/cdot_msa.c ZDOTKERNEL = ../mips/zdot_msa.c +else +SDOTKERNEL = ../mips/dot.c +DDOTKERNEL = ../mips/dot.c +CDOTKERNEL = ../mips/zdot.c +ZDOTKERNEL = ../mips/zdot.c +endif SNRM2KERNEL = ../mips/nrm2.c DNRM2KERNEL = ../mips/nrm2.c @@ -70,22 +84,45 @@ DSWAPKERNEL = ../mips/swap.c CSWAPKERNEL = ../mips/zswap.c ZSWAPKERNEL = ../mips/zswap.c +ifdef HAVE_MSA SGEMVNKERNEL = ../mips/sgemv_n_msa.c DGEMVNKERNEL = ../mips/dgemv_n_msa.c CGEMVNKERNEL = ../mips/cgemv_n_msa.c ZGEMVNKERNEL = ../mips/zgemv_n_msa.c +else +SGEMVNKERNEL = ../mips/gemv_n.c +DGEMVNKERNEL = ../mips/gemv_n.c +CGEMVNKERNEL = ../mips/zgemv_n.c +ZGEMVNKERNEL = ../mips/zgemv_n.c +endif +ifdef HAVE_MSA SGEMVTKERNEL = ../mips/sgemv_t_msa.c DGEMVTKERNEL = ../mips/dgemv_t_msa.c CGEMVTKERNEL = ../mips/cgemv_t_msa.c ZGEMVTKERNEL = ../mips/zgemv_t_msa.c +else +SGEMVTKERNEL = ../mips/gemv_t.c +DGEMVTKERNEL = ../mips/gemv_t.c +CGEMVTKERNEL = ../mips/zgemv_t.c +ZGEMVTKERNEL = ../mips/zgemv_t.c +endif +ifdef HAVE_MSA SGEMMKERNEL = ../mips/sgemm_kernel_8x8_msa.c SGEMMONCOPY = ../mips/sgemm_ncopy_8_msa.c SGEMMOTCOPY = ../mips/sgemm_tcopy_8_msa.c SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o +else +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o +endif +ifdef HAVE_MSA DGEMMKERNEL = ../mips/dgemm_kernel_8x4_msa.c DGEMMINCOPY = ../mips/dgemm_ncopy_8_msa.c DGEMMITCOPY = ../mips/dgemm_tcopy_8_msa.c @@ -95,7 +132,15 @@ DGEMMINCOPYOBJ = dgemm_incopy.o DGEMMITCOPYOBJ = dgemm_itcopy.o DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o +else +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o +endif +ifdef HAVE_MSA CGEMMKERNEL = ../mips/cgemm_kernel_8x4_msa.c CGEMMINCOPY = ../mips/cgemm_ncopy_8_msa.c CGEMMITCOPY = ../mips/cgemm_tcopy_8_msa.c @@ -105,29 +150,72 @@ CGEMMINCOPYOBJ = cgemm_incopy.o CGEMMITCOPYOBJ = cgemm_itcopy.o CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMOTCOPYOBJ = cgemm_otcopy.o +else +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o +endif +ifdef HAVE_MSA ZGEMMKERNEL = ../mips/zgemm_kernel_4x4_msa.c ZGEMMONCOPY = ../mips/zgemm_ncopy_4_msa.c ZGEMMOTCOPY = ../mips/zgemm_tcopy_4_msa.c ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy.o +else +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o +endif +ifdef HAVE_MSA STRSMKERNEL_LN = ../mips/strsm_kernel_LN_8x8_msa.c STRSMKERNEL_LT = ../mips/strsm_kernel_LT_8x8_msa.c STRSMKERNEL_RN = ../mips/strsm_kernel_RN_8x8_msa.c STRSMKERNEL_RT = ../mips/strsm_kernel_RT_8x8_msa.c +else +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif +ifdef HAVE_MSA DTRSMKERNEL_LN = ../mips/dtrsm_kernel_LN_8x4_msa.c DTRSMKERNEL_LT = ../mips/dtrsm_kernel_LT_8x4_msa.c DTRSMKERNEL_RN = ../mips/dtrsm_kernel_RN_8x4_msa.c DTRSMKERNEL_RT = ../mips/dtrsm_kernel_RT_8x4_msa.c +else +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif +ifdef HAVE_MSA CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +else +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif +ifdef HAVE_MSA ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +else +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif \ No newline at end of file diff --git a/param.h b/param.h index 555829d45..480518cd4 100644 --- a/param.h +++ b/param.h @@ -2174,7 +2174,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -#if defined(I6400) || defined(P6600) +#if defined(P5600) || defined(I6400) || defined(P6600) #define SNUMOPT 2 #define DNUMOPT 2 @@ -2182,6 +2182,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL +#ifdef HAVE_MSA #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 8 @@ -2193,46 +2194,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_N 4 +#else +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_P 128 -#define DGEMM_DEFAULT_P 128 -#define CGEMM_DEFAULT_P 96 -#define ZGEMM_DEFAULT_P 64 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_Q 240 -#define DGEMM_DEFAULT_Q 120 -#define CGEMM_DEFAULT_Q 120 -#define ZGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_R 12288 -#define DGEMM_DEFAULT_R 8192 -#define CGEMM_DEFAULT_R 4096 -#define ZGEMM_DEFAULT_R 4096 - - -#define SYMV_P 16 +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 #endif -#if defined(P5600) -#define SNUMOPT 2 -#define DNUMOPT 2 - -#define GEMM_DEFAULT_OFFSET_A 0 -#define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL - -#define SGEMM_DEFAULT_UNROLL_M 8 -#define SGEMM_DEFAULT_UNROLL_N 8 - -#define DGEMM_DEFAULT_UNROLL_M 8 -#define DGEMM_DEFAULT_UNROLL_N 4 - -#define CGEMM_DEFAULT_UNROLL_M 8 -#define CGEMM_DEFAULT_UNROLL_N 4 - -#define ZGEMM_DEFAULT_UNROLL_M 4 -#define ZGEMM_DEFAULT_UNROLL_N 4 - #define SGEMM_DEFAULT_P 128 #define DGEMM_DEFAULT_P 128 #define CGEMM_DEFAULT_P 96 @@ -2248,7 +2223,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 - #define SYMV_P 16 #endif