diff --git a/Makefile b/Makefile index 2ae004798..693808127 100644 --- a/Makefile +++ b/Makefile @@ -228,8 +228,8 @@ ifndef NOFORTRAN -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc - -@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "ARFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc diff --git a/Makefile.prebuild b/Makefile.prebuild index ee0b67787..524f0a741 100644 --- a/Makefile.prebuild +++ b/Makefile.prebuild @@ -17,14 +17,26 @@ ifdef CPUIDEMU EXFLAGS = -DCPUIDEMU -DVENDOR=99 endif +ifeq ($(TARGET), P5600) +TARGET_FLAGS = -mips32r5 +endif + +ifeq ($(TARGET), I6400) +TARGET_FLAGS = -mips64r6 +endif + +ifeq ($(TARGET), P6600) +TARGET_FLAGS = -mips64r6 +endif + all: getarch_2nd ./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 1 >> $(TARGET_CONF) config.h : c_check f_check getarch - perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) + perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) ifneq ($(ONLY_CBLAS), 1) - perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) + perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) else #When we only build CBLAS, we set NOFORTRAN=2 echo "NOFORTRAN=2" >> $(TARGET_MAKE) diff --git a/Makefile.system b/Makefile.system index e3abb5723..47b197f6f 100644 --- a/Makefile.system +++ b/Makefile.system @@ -159,7 +159,7 @@ ifndef GOTOBLAS_MAKEFILE export GOTOBLAS_MAKEFILE = 1 # Generating Makefile.conf and config.h -DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all) +DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all) ifndef TARGET_CORE include $(TOPDIR)/Makefile.conf @@ -502,13 +502,16 @@ endif ifdef NO_BINARY_MODE -ifeq ($(ARCH), $(filter $(ARCH),mips64 mips)) +ifeq ($(ARCH), $(filter $(ARCH),mips64)) ifdef BINARY64 CCOMMON_OPT += -mabi=64 else -CCOMMON_OPT += -mabi=32 +CCOMMON_OPT += -mabi=n32 endif BINARY_DEFINED = 1 +else ifeq ($(ARCH), $(filter $(ARCH),mips)) +CCOMMON_OPT += -mabi=32 +BINARY_DEFINED = 1 endif ifeq ($(CORE), LOONGSON3A) @@ -522,13 +525,18 @@ FCOMMON_OPT += -march=mips64 endif ifeq ($(CORE), P5600) -CCOMMON_OPT += -mmsa -FCOMMON_OPT += -mmsa +CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MIPS_SIMD_FLAGS) +FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MIPS_SIMD_FLAGS) endif -ifneq ($(filter $(CORE), I6400 P6600),) -CCOMMON_OPT += -mmsa -FCOMMON_OPT += -mmsa +ifeq ($(CORE), I6400) +CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MIPS_SIMD_FLAGS) +FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MIPS_SIMD_FLAGS) +endif + +ifeq ($(CORE), P6600) +CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MIPS_SIMD_FLAGS) +FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MIPS_SIMD_FLAGS) endif ifeq ($(OSNAME), AIX) @@ -599,12 +607,14 @@ ifneq ($(NO_LAPACK), 1) EXTRALIB += -lgfortran endif ifdef NO_BINARY_MODE -ifeq ($(ARCH), $(filter $(ARCH),mips64 mips)) +ifeq ($(ARCH), $(filter $(ARCH),mips64)) ifdef BINARY64 FCOMMON_OPT += -mabi=64 else -FCOMMON_OPT += -mabi=32 +FCOMMON_OPT += -mabi=n32 endif +else ifeq ($(ARCH), $(filter $(ARCH),mips)) +FCOMMON_OPT += -mabi=32 endif else ifdef BINARY64 @@ -687,20 +697,6 @@ FCOMMON_OPT += -i8 endif endif endif - -ifeq ($(filter $(ARCH),mips64 mips)) -ifndef BINARY64 -FCOMMON_OPT += -m32 -else -FCOMMON_OPT += -m64 -endif -else -ifdef BINARY64 -FCOMMON_OPT += -mabi=64 -else -FCOMMON_OPT += -mabi=32 -endif -endif ifeq ($(USE_OPENMP), 1) FCOMMON_OPT += -mp diff --git a/c_check b/c_check index 4569d842a..a7c7aac2c 100644 --- a/c_check +++ b/c_check @@ -1,6 +1,7 @@ #!/usr/bin/perl use File::Basename; +use File::Temp qw(tempfile); # Checking cross compile $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); @@ -10,6 +11,7 @@ $hostarch = "arm" if ($hostarch =~ /^arm.*/); $hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); +$tmpf = new File::Temp( UNLINK => 1 ); $binary = $ENV{"BINARY"}; $makefile = shift(@ARGV); @@ -79,10 +81,15 @@ if ($os eq "AIX") { $defined = 1; } -if (($architecture eq "mips") || ($architecture eq "mips64")) { - $compiler_name .= " -mabi=32" if ($binary eq "32"); - $compiler_name .= " -mabi=64" if ($binary eq "64"); - $defined = 1; +if ($architecture eq "mips") { + $compiler_name .= " -mabi=32"; + $defined = 1; +} + +if ($architecture eq "mips64") { + $compiler_name .= " -mabi=n32" if ($binary eq "32"); + $compiler_name .= " -mabi=64" if ($binary eq "64"); + $defined = 1; } if (($architecture eq "arm") || ($architecture eq "arm64")) { @@ -152,6 +159,26 @@ if ($?) { die 1; } +$mips_simd = 1; +if (($architecture eq "mips") || ($architecture eq "mips64")) { + $code = '"addvi.b $w0, $w1, 1"'; + $msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs"; + print $tmpf "#include \n\n"; + print $tmpf "void main(void){ __asm__ volatile($code); }\n"; + + $ret = 0; + $args = "$msa_flags -o $tmpf.o -x c $tmpf"; + my @cmd = ("$compiler_name $args"); + system(@cmd) == 0; + if ($? != 0) { + $ret = ($? >> 8); + } + unlink("$tmpf.o"); + if($ret != 0) { + $mips_simd = 0; + } +} + $architecture = x86 if ($data =~ /ARCH_X86/); $architecture = x86_64 if ($data =~ /ARCH_X86_64/); $architecture = power if ($data =~ /ARCH_POWER/); @@ -246,6 +273,8 @@ print MAKEFILE "FU=$need_fu\n" if $need_fu ne ""; print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne ""; print MAKEFILE "CROSS=1\n" if $cross != 0; print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; +print MAKEFILE "MIPS_SIMD=1\n" if $mips_simd eq 1; +print MAKEFILE "MIPS_SIMD_FLAGS=$msa_flags\n" if $mips_simd eq 1; $os =~ tr/[a-z]/[A-Z]/; $architecture =~ tr/[a-z]/[A-Z]/; @@ -257,6 +286,7 @@ print CONFFILE "#define C_$compiler\t1\n"; print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; +print CONFFILE "#define MIPS_SIMD\t1\n" if $mips_simd eq 1; if ($os eq "LINUX") { diff --git a/f_check b/f_check index 171c91f95..2f01f1c44 100644 --- a/f_check +++ b/f_check @@ -223,7 +223,12 @@ if (!$?) { } #For gfortran MIPS if ($?) { - $link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; + $mips_data = `$compiler_bin -E -dM - < /dev/null`; + if ($mips_data =~ /_MIPS_ISA_MIPS64/) { + $link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; + } else { + $link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`; + } } $binary = "" if ($?); } diff --git a/kernel/Makefile b/kernel/Makefile index a0a8fcd21..8237549f3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -4,6 +4,7 @@ endif TOPDIR = .. include $(TOPDIR)/Makefile.system +include $(TOPDIR)/Makefile.conf ifdef TARGET_CORE override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) diff --git a/kernel/mips/KERNEL.P5600 b/kernel/mips/KERNEL.P5600 index 7bf90c905..92c0b3c8a 100644 --- a/kernel/mips/KERNEL.P5600 +++ b/kernel/mips/KERNEL.P5600 @@ -30,10 +30,17 @@ IDMAXKERNEL = ../mips/imax.c ISMINKERNEL = ../mips/imin.c IDMINKERNEL = ../mips/imin.c +ifdef MIPS_SIMD SASUMKERNEL = ../mips/sasum_msa.c DASUMKERNEL = ../mips/dasum_msa.c CASUMKERNEL = ../mips/casum_msa.c ZASUMKERNEL = ../mips/zasum_msa.c +else +SASUMKERNEL = ../mips/asum.c +DASUMKERNEL = ../mips/asum.c +CASUMKERNEL = ../mips/asum.c +ZASUMKERNEL = ../mips/asum.c +endif SAXPYKERNEL = ../mips/axpy.c DAXPYKERNEL = ../mips/axpy.c @@ -45,10 +52,17 @@ DCOPYKERNEL = ../mips/copy.c CCOPYKERNEL = ../mips/zcopy.c ZCOPYKERNEL = ../mips/zcopy.c +ifdef MIPS_SIMD SDOTKERNEL = ../mips/sdot_msa.c DDOTKERNEL = ../mips/ddot_msa.c CDOTKERNEL = ../mips/cdot_msa.c ZDOTKERNEL = ../mips/zdot_msa.c +else +SDOTKERNEL = ../mips/dot.c +DDOTKERNEL = ../mips/dot.c +CDOTKERNEL = ../mips/zdot.c +ZDOTKERNEL = ../mips/zdot.c +endif SNRM2KERNEL = ../mips/nrm2.c DNRM2KERNEL = ../mips/nrm2.c @@ -70,22 +84,45 @@ DSWAPKERNEL = ../mips/swap.c CSWAPKERNEL = ../mips/zswap.c ZSWAPKERNEL = ../mips/zswap.c +ifdef MIPS_SIMD SGEMVNKERNEL = ../mips/sgemv_n_msa.c DGEMVNKERNEL = ../mips/dgemv_n_msa.c CGEMVNKERNEL = ../mips/cgemv_n_msa.c ZGEMVNKERNEL = ../mips/zgemv_n_msa.c +else +SGEMVNKERNEL = ../mips/gemv_n.c +DGEMVNKERNEL = ../mips/gemv_n.c +CGEMVNKERNEL = ../mips/zgemv_n.c +ZGEMVNKERNEL = ../mips/zgemv_n.c +endif +ifdef MIPS_SIMD SGEMVTKERNEL = ../mips/sgemv_t_msa.c DGEMVTKERNEL = ../mips/dgemv_t_msa.c CGEMVTKERNEL = ../mips/cgemv_t_msa.c ZGEMVTKERNEL = ../mips/zgemv_t_msa.c +else +SGEMVTKERNEL = ../mips/gemv_t.c +DGEMVTKERNEL = ../mips/gemv_t.c +CGEMVTKERNEL = ../mips/zgemv_t.c +ZGEMVTKERNEL = ../mips/zgemv_t.c +endif +ifdef MIPS_SIMD SGEMMKERNEL = ../mips/sgemm_kernel_8x8_msa.c SGEMMONCOPY = ../mips/sgemm_ncopy_8_msa.c SGEMMOTCOPY = ../mips/sgemm_tcopy_8_msa.c SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o +else +SGEMMKERNEL = ../generic/gemmkernel_2x2.c +SGEMMONCOPY = ../generic/gemm_ncopy_2.c +SGEMMOTCOPY = ../generic/gemm_tcopy_2.c +SGEMMONCOPYOBJ = sgemm_oncopy.o +SGEMMOTCOPYOBJ = sgemm_otcopy.o +endif +ifdef MIPS_SIMD DGEMMKERNEL = ../mips/dgemm_kernel_8x4_msa.c DGEMMINCOPY = ../mips/dgemm_ncopy_8_msa.c DGEMMITCOPY = ../mips/dgemm_tcopy_8_msa.c @@ -95,7 +132,15 @@ DGEMMINCOPYOBJ = dgemm_incopy.o DGEMMITCOPYOBJ = dgemm_itcopy.o DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o +else +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c +DGEMMONCOPYOBJ = dgemm_oncopy.o +DGEMMOTCOPYOBJ = dgemm_otcopy.o +endif +ifdef MIPS_SIMD CGEMMKERNEL = ../mips/cgemm_kernel_8x4_msa.c CGEMMINCOPY = ../mips/cgemm_ncopy_8_msa.c CGEMMITCOPY = ../mips/cgemm_tcopy_8_msa.c @@ -105,29 +150,72 @@ CGEMMINCOPYOBJ = cgemm_incopy.o CGEMMITCOPYOBJ = cgemm_itcopy.o CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMOTCOPYOBJ = cgemm_otcopy.o +else +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o +endif +ifdef MIPS_SIMD ZGEMMKERNEL = ../mips/zgemm_kernel_4x4_msa.c ZGEMMONCOPY = ../mips/zgemm_ncopy_4_msa.c ZGEMMOTCOPY = ../mips/zgemm_tcopy_4_msa.c ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy.o +else +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o +endif +ifdef MIPS_SIMD STRSMKERNEL_LN = ../mips/strsm_kernel_LN_8x8_msa.c STRSMKERNEL_LT = ../mips/strsm_kernel_LT_8x8_msa.c STRSMKERNEL_RN = ../mips/strsm_kernel_RN_8x8_msa.c STRSMKERNEL_RT = ../mips/strsm_kernel_RT_8x8_msa.c +else +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif +ifdef MIPS_SIMD DTRSMKERNEL_LN = ../mips/dtrsm_kernel_LN_8x4_msa.c DTRSMKERNEL_LT = ../mips/dtrsm_kernel_LT_8x4_msa.c DTRSMKERNEL_RN = ../mips/dtrsm_kernel_RN_8x4_msa.c DTRSMKERNEL_RT = ../mips/dtrsm_kernel_RT_8x4_msa.c +else +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif +ifdef MIPS_SIMD CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +else +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif +ifdef MIPS_SIMD ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +else +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +endif \ No newline at end of file diff --git a/lapack-netlib/BLAS/SRC/Makefile b/lapack-netlib/BLAS/SRC/Makefile index 43dbfb749..6ab015603 100644 --- a/lapack-netlib/BLAS/SRC/Makefile +++ b/lapack-netlib/BLAS/SRC/Makefile @@ -138,26 +138,26 @@ ALLOBJ=$(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \ $(ZBLAS2) $(ZBLAS3) $(ALLBLAS) $(BLASLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) + $(AR) $(ARFLAGS) $@ $(ALLOBJ) $(RANLIB) $@ single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(SBLAS1) $(ALLBLAS) \ + $(AR) $(ARFLAGS) $(BLASLIB) $(SBLAS1) $(ALLBLAS) \ $(SBLAS2) $(SBLAS3) $(RANLIB) $(BLASLIB) double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \ + $(AR) $(ARFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \ $(DBLAS2) $(DBLAS3) $(RANLIB) $(BLASLIB) complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(CBLAS1) $(CB1AUX) \ + $(AR) $(ARFLAGS) $(BLASLIB) $(CBLAS1) $(CB1AUX) \ $(ALLBLAS) $(CBLAS2) $(CBLAS3) $(RANLIB) $(BLASLIB) complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) - $(ARCH) $(ARCHFLAGS) $(BLASLIB) $(ZBLAS1) $(ZB1AUX) \ + $(AR) $(ARFLAGS) $(BLASLIB) $(ZBLAS1) $(ZB1AUX) \ $(ALLBLAS) $(ZBLAS2) $(ZBLAS3) $(RANLIB) $(BLASLIB) diff --git a/lapack-netlib/CBLAS/Makefile.in b/lapack-netlib/CBLAS/Makefile.in index fe0143044..9528cc93e 100644 --- a/lapack-netlib/CBLAS/Makefile.in +++ b/lapack-netlib/CBLAS/Makefile.in @@ -44,6 +44,6 @@ FFLAGS = -O3 # Archive programs and flags #----------------------------------------------------------------------------- -ARCH = ar -ARCHFLAGS = cr +AR = ar +ARFLAGS = cr RANLIB = ranlib diff --git a/lapack-netlib/CBLAS/src/Makefile b/lapack-netlib/CBLAS/src/Makefile index d5c73cbb0..fa1b03dc2 100644 --- a/lapack-netlib/CBLAS/src/Makefile +++ b/lapack-netlib/CBLAS/src/Makefile @@ -73,27 +73,27 @@ alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1) # Single real precision slib1: $(slev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev1) $(sclev1) + $(AR) $(ARFLAGS) $(CBLASLIB) $(slev1) $(sclev1) $(RANLIB) $(CBLASLIB) # Double real precision dlib1: $(dlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev1) + $(AR) $(ARFLAGS) $(CBLASLIB) $(dlev1) $(RANLIB) $(CBLASLIB) # Single complex precision clib1: $(clev1) $(sclev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev1) $(sclev1) + $(AR) $(ARFLAGS) $(CBLASLIB) $(clev1) $(sclev1) $(RANLIB) $(CBLASLIB) # Double complex precision zlib1: $(zlev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev1) + $(AR) $(ARFLAGS) $(CBLASLIB) $(zlev1) $(RANLIB) $(CBLASLIB) # All precisions all1: $(alev1) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev1) + $(AR) $(ARFLAGS) $(CBLASLIB) $(alev1) $(RANLIB) $(CBLASLIB) # @@ -146,27 +146,27 @@ alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2) # Single real precision slib2: $(slev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev2) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(slev2) $(errhand) $(RANLIB) $(CBLASLIB) # Double real precision dlib2: $(dlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev2) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(dlev2) $(errhand) $(RANLIB) $(CBLASLIB) # Single complex precision clib2: $(clev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev2) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(clev2) $(errhand) $(RANLIB) $(CBLASLIB) # Double complex precision zlib2: $(zlev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev2) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(zlev2) $(errhand) $(RANLIB) $(CBLASLIB) # All precisions all2: $(alev2) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev2) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(alev2) $(errhand) $(RANLIB) $(CBLASLIB) # # @@ -211,32 +211,32 @@ alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3) # Single real precision slib3: $(slev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev3) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(slev3) $(errhand) $(RANLIB) $(CBLASLIB) # Double real precision dlib3: $(dlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev3) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(dlev3) $(errhand) $(RANLIB) $(CBLASLIB) # Single complex precision clib3: $(clev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev3) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(clev3) $(errhand) $(RANLIB) $(CBLASLIB) # Single complex precision zlib3: $(zlev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev3) $(errhand) + $(AR) $(ARFLAGS) $(CBLASLIB) $(zlev3) $(errhand) $(RANLIB) $(CBLASLIB) # All precisions all3: $(alev3) $(errhand) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev3) + $(AR) $(ARFLAGS) $(CBLASLIB) $(alev3) $(RANLIB) $(CBLASLIB) # All levels and precisions cblaslib: $(alev) - $(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev) + $(AR) $(ARFLAGS) $(CBLASLIB) $(alev) $(RANLIB) $(CBLASLIB) FRC: diff --git a/lapack-netlib/DOCS/lawn81.tex b/lapack-netlib/DOCS/lawn81.tex index 16efef768..84e12f80b 100644 --- a/lapack-netlib/DOCS/lawn81.tex +++ b/lapack-netlib/DOCS/lawn81.tex @@ -466,7 +466,7 @@ TIMER = EXT_ETIME Refer to the section~\ref{second} to get more information. -Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver, +Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver, archiver options, and ranlib for your machine. If your architecture does not require \texttt{ranlib} to be run after each archive command (as is the case with CRAY computers running UNICOS, Hewlett Packard diff --git a/lapack-netlib/INSTALL/make.inc.ALPHA b/lapack-netlib/INSTALL/make.inc.ALPHA index 33353d2d0..d20fe70bc 100644 --- a/lapack-netlib/INSTALL/make.inc.ALPHA +++ b/lapack-netlib/INSTALL/make.inc.ALPHA @@ -52,9 +52,9 @@ CFLAGS = -O4 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib +AR = ar +ARFLAGS = cr +RANLIB = ranlib # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.HPPA b/lapack-netlib/INSTALL/make.inc.HPPA index 062e1a56b..f38e16bbc 100644 --- a/lapack-netlib/INSTALL/make.inc.HPPA +++ b/lapack-netlib/INSTALL/make.inc.HPPA @@ -52,9 +52,9 @@ CFLAGS = # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo +AR = ar +ARFLAGS = cr +RANLIB = echo # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.IRIX64 b/lapack-netlib/INSTALL/make.inc.IRIX64 index c8b34e4ed..6ad48c2bc 100644 --- a/lapack-netlib/INSTALL/make.inc.IRIX64 +++ b/lapack-netlib/INSTALL/make.inc.IRIX64 @@ -55,9 +55,9 @@ CFLAGS = -O3 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo +AR = ar +ARFLAGS = cr +RANLIB = echo # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.O2K b/lapack-netlib/INSTALL/make.inc.O2K index 55b7de245..1ea0a1c8c 100644 --- a/lapack-netlib/INSTALL/make.inc.O2K +++ b/lapack-netlib/INSTALL/make.inc.O2K @@ -55,9 +55,9 @@ CFLAGS = -O3 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo +AR = ar +ARFLAGS = cr +RANLIB = echo # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.SGI5 b/lapack-netlib/INSTALL/make.inc.SGI5 index dccfae333..8afd522b8 100644 --- a/lapack-netlib/INSTALL/make.inc.SGI5 +++ b/lapack-netlib/INSTALL/make.inc.SGI5 @@ -52,9 +52,9 @@ CFLAGS = -O4 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo +AR = ar +ARFLAGS = cr +RANLIB = echo # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.SUN4 b/lapack-netlib/INSTALL/make.inc.SUN4 index dd5cfd41e..39b0136ac 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4 +++ b/lapack-netlib/INSTALL/make.inc.SUN4 @@ -52,9 +52,9 @@ CFLAGS = -O3 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib +AR = ar +ARFLAGS = cr +RANLIB = ranlib # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 index eb71a386d..db4725845 100644 --- a/lapack-netlib/INSTALL/make.inc.SUN4SOL2 +++ b/lapack-netlib/INSTALL/make.inc.SUN4SOL2 @@ -56,9 +56,9 @@ CFLAGS = -O3 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo +AR = ar +ARFLAGS = cr +RANLIB = echo # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.XLF b/lapack-netlib/INSTALL/make.inc.XLF index 5824e8f15..2b05fbebb 100644 --- a/lapack-netlib/INSTALL/make.inc.XLF +++ b/lapack-netlib/INSTALL/make.inc.XLF @@ -53,9 +53,9 @@ CFLAGS = -O3 -qnosave # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib +AR = ar +ARFLAGS = cr +RANLIB = ranlib # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.gfortran b/lapack-netlib/INSTALL/make.inc.gfortran index 43986435c..27164c2c1 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran +++ b/lapack-netlib/INSTALL/make.inc.gfortran @@ -56,9 +56,9 @@ CFLAGS = -O3 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib +AR = ar +ARFLAGS = cr +RANLIB = ranlib # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.gfortran_debug b/lapack-netlib/INSTALL/make.inc.gfortran_debug index 294758f09..ceddfc665 100644 --- a/lapack-netlib/INSTALL/make.inc.gfortran_debug +++ b/lapack-netlib/INSTALL/make.inc.gfortran_debug @@ -56,9 +56,9 @@ CFLAGS = -g # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib +AR = ar +ARFLAGS = cr +RANLIB = ranlib # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.ifort b/lapack-netlib/INSTALL/make.inc.ifort index 5fca5c47e..3e5b83daa 100644 --- a/lapack-netlib/INSTALL/make.inc.ifort +++ b/lapack-netlib/INSTALL/make.inc.ifort @@ -52,9 +52,9 @@ CFLAGS = -O3 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib +AR = ar +ARFLAGS = cr +RANLIB = ranlib # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.pgf95 b/lapack-netlib/INSTALL/make.inc.pgf95 index aaddfa5bd..cfad5dfe3 100644 --- a/lapack-netlib/INSTALL/make.inc.pgf95 +++ b/lapack-netlib/INSTALL/make.inc.pgf95 @@ -52,9 +52,9 @@ CFLAGS = # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo +AR = ar +ARFLAGS = cr +RANLIB = echo # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/INSTALL/make.inc.pghpf b/lapack-netlib/INSTALL/make.inc.pghpf index 782c16d76..3261da874 100644 --- a/lapack-netlib/INSTALL/make.inc.pghpf +++ b/lapack-netlib/INSTALL/make.inc.pghpf @@ -52,9 +52,9 @@ CFLAGS = # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = echo +AR = ar +ARFLAGS = cr +RANLIB = echo # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 636ca35b6..3e10fff35 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -2207,8 +2207,8 @@ OBJ_FILES := $(C_FILES:.o=.o) all: ../../$(LAPACKELIB) ../../$(LAPACKELIB): $(ALLOBJA) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED) - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJA) - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED) + $(AR) $(ARFLAGS) ../../$(LAPACKELIB) $(ALLOBJA) + $(AR) $(ARFLAGS) ../../$(LAPACKELIB) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED) $(RANLIB) ../../$(LAPACKELIB) .c.o: diff --git a/lapack-netlib/LAPACKE/utils/Makefile b/lapack-netlib/LAPACKE/utils/Makefile index 1d7856789..cd3de5c8b 100644 --- a/lapack-netlib/LAPACKE/utils/Makefile +++ b/lapack-netlib/LAPACKE/utils/Makefile @@ -186,7 +186,7 @@ OBJ = lapacke_cgb_nancheck.o \ all: lib lib: $(OBJ) - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(OBJ) + $(AR) $(ARFLAGS) ../../$(LAPACKELIB) $(OBJ) $(RANLIB) ../../$(LAPACKELIB) .c.o: diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 22799769a..257ff136a 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -455,26 +455,26 @@ endif all: ../$(LAPACKLIB) ../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) + $(AR) $(ARFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED) $(RANLIB) $@ single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \ $(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX) $(RANLIB) ../$(LAPACKLIB) complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \ $(CXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX) $(RANLIB) ../$(LAPACKLIB) double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \ $(DXLASRC) $(DZLAUX) $(ALLAUX) $(ALLXAUX) $(RANLIB) ../$(LAPACKLIB) complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX) - $(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ + $(AR) $(ARFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \ $(ZXLASRC) $(DZLAUX) $(ALLAUX) $(ALLXAUX) $(RANLIB) ../$(LAPACKLIB) diff --git a/lapack-netlib/SRC/VARIANTS/Makefile b/lapack-netlib/SRC/VARIANTS/Makefile index 42446eb55..6034aed60 100644 --- a/lapack-netlib/SRC/VARIANTS/Makefile +++ b/lapack-netlib/SRC/VARIANTS/Makefile @@ -35,27 +35,27 @@ QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil. all: cholrl choltop lucr lull lurec qrll cholrl: $(CHOLRL) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL) + $(AR) $(ARFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL) $(RANLIB) $(VARIANTSDIR)/cholrl.a choltop: $(CHOLTOP) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP) + $(AR) $(ARFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP) $(RANLIB) $(VARIANTSDIR)/choltop.a lucr: $(LUCR) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR) + $(AR) $(ARFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR) $(RANLIB) $(VARIANTSDIR)/lucr.a lull: $(LULL) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lull.a $(LULL) + $(AR) $(ARFLAGS) $(VARIANTSDIR)/lull.a $(LULL) $(RANLIB) $(VARIANTSDIR)/lull.a lurec: $(LUREC) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC) + $(AR) $(ARFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC) $(RANLIB) $(VARIANTSDIR)/lurec.a qrll: $(QRLL) - $(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL) + $(AR) $(ARFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL) $(RANLIB) $(VARIANTSDIR)/qrll.a diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index ecd9aa5c1..0ba5f44ad 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -59,23 +59,23 @@ ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \ $(DZATGEN) ../../$(TMGLIB): $(ALLOBJ) - $(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) + $(AR) $(ARFLAGS) $@ $(ALLOBJ) $(RANLIB) $@ single: $(SMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(SMATGEN) $(SCATGEN) + $(AR) $(ARFLAGS) ../../$(TMGLIB) $(SMATGEN) $(SCATGEN) $(RANLIB) ../../$(TMGLIB) complex: $(CMATGEN) $(SCATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(CMATGEN) $(SCATGEN) + $(AR) $(ARFLAGS) ../../$(TMGLIB) $(CMATGEN) $(SCATGEN) $(RANLIB) ../../$(TMGLIB) double: $(DMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(DMATGEN) $(DZATGEN) + $(AR) $(ARFLAGS) ../../$(TMGLIB) $(DMATGEN) $(DZATGEN) $(RANLIB) ../../$(TMGLIB) complex16: $(ZMATGEN) $(DZATGEN) - $(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(ZMATGEN) $(DZATGEN) + $(AR) $(ARFLAGS) ../../$(TMGLIB) $(ZMATGEN) $(DZATGEN) $(RANLIB) ../../$(TMGLIB) $(SCATGEN): $(FRC) diff --git a/lapack-netlib/make.inc.example b/lapack-netlib/make.inc.example index 504a16421..0379069bb 100644 --- a/lapack-netlib/make.inc.example +++ b/lapack-netlib/make.inc.example @@ -57,9 +57,9 @@ CFLAGS = -O3 # The archiver and the flag(s) to use when building archive (library) # If you system has no ranlib, set RANLIB = echo. # -ARCH = ar -ARCHFLAGS= cr -RANLIB = ranlib +AR = ar +ARFLAGS = cr +RANLIB = ranlib # # Location of the extended-precision BLAS (XBLAS) Fortran library # used for building and testing extended-precision routines. The diff --git a/make.inc b/make.inc index 1fc95b0c6..a05d3f42e 100644 --- a/make.inc +++ b/make.inc @@ -1,6 +1,6 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(NOOPT) -ARCHFLAGS= -ru +ARFLAGS= -ru #RANLIB = ranlib diff --git a/param.h b/param.h index 555829d45..9f131eb77 100644 --- a/param.h +++ b/param.h @@ -2174,7 +2174,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SYMV_P 16 #endif -#if defined(I6400) || defined(P6600) +#if defined(P5600) || defined(I6400) || defined(P6600) #define SNUMOPT 2 #define DNUMOPT 2 @@ -2182,6 +2182,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GEMM_DEFAULT_OFFSET_B 0 #define GEMM_DEFAULT_ALIGN 0x03fffUL +#ifdef MIPS_SIMD #define SGEMM_DEFAULT_UNROLL_M 8 #define SGEMM_DEFAULT_UNROLL_N 8 @@ -2193,46 +2194,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define ZGEMM_DEFAULT_UNROLL_M 4 #define ZGEMM_DEFAULT_UNROLL_N 4 +#else +#define SGEMM_DEFAULT_UNROLL_M 2 +#define SGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_P 128 -#define DGEMM_DEFAULT_P 128 -#define CGEMM_DEFAULT_P 96 -#define ZGEMM_DEFAULT_P 64 +#define DGEMM_DEFAULT_UNROLL_M 2 +#define DGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_Q 240 -#define DGEMM_DEFAULT_Q 120 -#define CGEMM_DEFAULT_Q 120 -#define ZGEMM_DEFAULT_Q 120 +#define CGEMM_DEFAULT_UNROLL_M 2 +#define CGEMM_DEFAULT_UNROLL_N 2 -#define SGEMM_DEFAULT_R 12288 -#define DGEMM_DEFAULT_R 8192 -#define CGEMM_DEFAULT_R 4096 -#define ZGEMM_DEFAULT_R 4096 - - -#define SYMV_P 16 +#define ZGEMM_DEFAULT_UNROLL_M 2 +#define ZGEMM_DEFAULT_UNROLL_N 2 #endif -#if defined(P5600) -#define SNUMOPT 2 -#define DNUMOPT 2 - -#define GEMM_DEFAULT_OFFSET_A 0 -#define GEMM_DEFAULT_OFFSET_B 0 -#define GEMM_DEFAULT_ALIGN 0x03fffUL - -#define SGEMM_DEFAULT_UNROLL_M 8 -#define SGEMM_DEFAULT_UNROLL_N 8 - -#define DGEMM_DEFAULT_UNROLL_M 8 -#define DGEMM_DEFAULT_UNROLL_N 4 - -#define CGEMM_DEFAULT_UNROLL_M 8 -#define CGEMM_DEFAULT_UNROLL_N 4 - -#define ZGEMM_DEFAULT_UNROLL_M 4 -#define ZGEMM_DEFAULT_UNROLL_N 4 - #define SGEMM_DEFAULT_P 128 #define DGEMM_DEFAULT_P 128 #define CGEMM_DEFAULT_P 96 @@ -2248,7 +2223,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 - #define SYMV_P 16 #endif