MIPS n32 ABI support, MSA support detection and rename ARCH, ARCHFLAGS
Signed-off-by: Shivraj Patil <shivraj.patil@imgtec.com>
This commit is contained in:
parent
b544be914d
commit
d1c6469283
4
Makefile
4
Makefile
|
@ -228,8 +228,8 @@ ifndef NOFORTRAN
|
||||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "ARFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
|
|
@ -17,14 +17,26 @@ ifdef CPUIDEMU
|
||||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), P5600)
|
||||||
|
TARGET_FLAGS = -mips32r5
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), I6400)
|
||||||
|
TARGET_FLAGS = -mips64r6
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(TARGET), P6600)
|
||||||
|
TARGET_FLAGS = -mips64r6
|
||||||
|
endif
|
||||||
|
|
||||||
all: getarch_2nd
|
all: getarch_2nd
|
||||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||||
|
|
||||||
config.h : c_check f_check getarch
|
config.h : c_check f_check getarch
|
||||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC)
|
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS)
|
||||||
ifneq ($(ONLY_CBLAS), 1)
|
ifneq ($(ONLY_CBLAS), 1)
|
||||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC)
|
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||||
else
|
else
|
||||||
#When we only build CBLAS, we set NOFORTRAN=2
|
#When we only build CBLAS, we set NOFORTRAN=2
|
||||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||||
|
|
|
@ -159,7 +159,7 @@ ifndef GOTOBLAS_MAKEFILE
|
||||||
export GOTOBLAS_MAKEFILE = 1
|
export GOTOBLAS_MAKEFILE = 1
|
||||||
|
|
||||||
# Generating Makefile.conf and config.h
|
# Generating Makefile.conf and config.h
|
||||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all)
|
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
|
||||||
|
|
||||||
ifndef TARGET_CORE
|
ifndef TARGET_CORE
|
||||||
include $(TOPDIR)/Makefile.conf
|
include $(TOPDIR)/Makefile.conf
|
||||||
|
@ -502,13 +502,16 @@ endif
|
||||||
|
|
||||||
ifdef NO_BINARY_MODE
|
ifdef NO_BINARY_MODE
|
||||||
|
|
||||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
CCOMMON_OPT += -mabi=64
|
CCOMMON_OPT += -mabi=64
|
||||||
else
|
else
|
||||||
CCOMMON_OPT += -mabi=32
|
CCOMMON_OPT += -mabi=n32
|
||||||
endif
|
endif
|
||||||
BINARY_DEFINED = 1
|
BINARY_DEFINED = 1
|
||||||
|
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||||
|
CCOMMON_OPT += -mabi=32
|
||||||
|
BINARY_DEFINED = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), LOONGSON3A)
|
ifeq ($(CORE), LOONGSON3A)
|
||||||
|
@ -522,13 +525,18 @@ FCOMMON_OPT += -march=mips64
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), P5600)
|
ifeq ($(CORE), P5600)
|
||||||
CCOMMON_OPT += -mmsa
|
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MIPS_SIMD_FLAGS)
|
||||||
FCOMMON_OPT += -mmsa
|
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MIPS_SIMD_FLAGS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifneq ($(filter $(CORE), I6400 P6600),)
|
ifeq ($(CORE), I6400)
|
||||||
CCOMMON_OPT += -mmsa
|
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MIPS_SIMD_FLAGS)
|
||||||
FCOMMON_OPT += -mmsa
|
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MIPS_SIMD_FLAGS)
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifeq ($(CORE), P6600)
|
||||||
|
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MIPS_SIMD_FLAGS)
|
||||||
|
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MIPS_SIMD_FLAGS)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
|
@ -599,12 +607,14 @@ ifneq ($(NO_LAPACK), 1)
|
||||||
EXTRALIB += -lgfortran
|
EXTRALIB += -lgfortran
|
||||||
endif
|
endif
|
||||||
ifdef NO_BINARY_MODE
|
ifdef NO_BINARY_MODE
|
||||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips))
|
ifeq ($(ARCH), $(filter $(ARCH),mips64))
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
FCOMMON_OPT += -mabi=64
|
FCOMMON_OPT += -mabi=64
|
||||||
else
|
else
|
||||||
FCOMMON_OPT += -mabi=32
|
FCOMMON_OPT += -mabi=n32
|
||||||
endif
|
endif
|
||||||
|
else ifeq ($(ARCH), $(filter $(ARCH),mips))
|
||||||
|
FCOMMON_OPT += -mabi=32
|
||||||
endif
|
endif
|
||||||
else
|
else
|
||||||
ifdef BINARY64
|
ifdef BINARY64
|
||||||
|
@ -688,20 +698,6 @@ endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(filter $(ARCH),mips64 mips))
|
|
||||||
ifndef BINARY64
|
|
||||||
FCOMMON_OPT += -m32
|
|
||||||
else
|
|
||||||
FCOMMON_OPT += -m64
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
ifdef BINARY64
|
|
||||||
FCOMMON_OPT += -mabi=64
|
|
||||||
else
|
|
||||||
FCOMMON_OPT += -mabi=32
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(USE_OPENMP), 1)
|
ifeq ($(USE_OPENMP), 1)
|
||||||
FCOMMON_OPT += -mp
|
FCOMMON_OPT += -mp
|
||||||
endif
|
endif
|
||||||
|
|
34
c_check
34
c_check
|
@ -1,6 +1,7 @@
|
||||||
#!/usr/bin/perl
|
#!/usr/bin/perl
|
||||||
|
|
||||||
use File::Basename;
|
use File::Basename;
|
||||||
|
use File::Temp qw(tempfile);
|
||||||
|
|
||||||
# Checking cross compile
|
# Checking cross compile
|
||||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
|
||||||
|
@ -10,6 +11,7 @@ $hostarch = "arm" if ($hostarch =~ /^arm.*/);
|
||||||
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
$hostarch = "arm64" if ($hostarch eq "aarch64");
|
||||||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/);
|
||||||
|
|
||||||
|
$tmpf = new File::Temp( UNLINK => 1 );
|
||||||
$binary = $ENV{"BINARY"};
|
$binary = $ENV{"BINARY"};
|
||||||
|
|
||||||
$makefile = shift(@ARGV);
|
$makefile = shift(@ARGV);
|
||||||
|
@ -79,8 +81,13 @@ if ($os eq "AIX") {
|
||||||
$defined = 1;
|
$defined = 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
if ($architecture eq "mips") {
|
||||||
$compiler_name .= " -mabi=32" if ($binary eq "32");
|
$compiler_name .= " -mabi=32";
|
||||||
|
$defined = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($architecture eq "mips64") {
|
||||||
|
$compiler_name .= " -mabi=n32" if ($binary eq "32");
|
||||||
$compiler_name .= " -mabi=64" if ($binary eq "64");
|
$compiler_name .= " -mabi=64" if ($binary eq "64");
|
||||||
$defined = 1;
|
$defined = 1;
|
||||||
}
|
}
|
||||||
|
@ -152,6 +159,26 @@ if ($?) {
|
||||||
die 1;
|
die 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$mips_simd = 1;
|
||||||
|
if (($architecture eq "mips") || ($architecture eq "mips64")) {
|
||||||
|
$code = '"addvi.b $w0, $w1, 1"';
|
||||||
|
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
|
||||||
|
print $tmpf "#include <msa.h>\n\n";
|
||||||
|
print $tmpf "void main(void){ __asm__ volatile($code); }\n";
|
||||||
|
|
||||||
|
$ret = 0;
|
||||||
|
$args = "$msa_flags -o $tmpf.o -x c $tmpf";
|
||||||
|
my @cmd = ("$compiler_name $args");
|
||||||
|
system(@cmd) == 0;
|
||||||
|
if ($? != 0) {
|
||||||
|
$ret = ($? >> 8);
|
||||||
|
}
|
||||||
|
unlink("$tmpf.o");
|
||||||
|
if($ret != 0) {
|
||||||
|
$mips_simd = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
$architecture = x86 if ($data =~ /ARCH_X86/);
|
$architecture = x86 if ($data =~ /ARCH_X86/);
|
||||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
|
||||||
$architecture = power if ($data =~ /ARCH_POWER/);
|
$architecture = power if ($data =~ /ARCH_POWER/);
|
||||||
|
@ -246,6 +273,8 @@ print MAKEFILE "FU=$need_fu\n" if $need_fu ne "";
|
||||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne "";
|
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne "";
|
||||||
print MAKEFILE "CROSS=1\n" if $cross != 0;
|
print MAKEFILE "CROSS=1\n" if $cross != 0;
|
||||||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
|
||||||
|
print MAKEFILE "MIPS_SIMD=1\n" if $mips_simd eq 1;
|
||||||
|
print MAKEFILE "MIPS_SIMD_FLAGS=$msa_flags\n" if $mips_simd eq 1;
|
||||||
|
|
||||||
$os =~ tr/[a-z]/[A-Z]/;
|
$os =~ tr/[a-z]/[A-Z]/;
|
||||||
$architecture =~ tr/[a-z]/[A-Z]/;
|
$architecture =~ tr/[a-z]/[A-Z]/;
|
||||||
|
@ -257,6 +286,7 @@ print CONFFILE "#define C_$compiler\t1\n";
|
||||||
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
|
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32;
|
||||||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
|
||||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
|
||||||
|
print CONFFILE "#define MIPS_SIMD\t1\n" if $mips_simd eq 1;
|
||||||
|
|
||||||
if ($os eq "LINUX") {
|
if ($os eq "LINUX") {
|
||||||
|
|
||||||
|
|
5
f_check
5
f_check
|
@ -223,7 +223,12 @@ if (!$?) {
|
||||||
}
|
}
|
||||||
#For gfortran MIPS
|
#For gfortran MIPS
|
||||||
if ($?) {
|
if ($?) {
|
||||||
|
$mips_data = `$compiler_bin -E -dM - < /dev/null`;
|
||||||
|
if ($mips_data =~ /_MIPS_ISA_MIPS64/) {
|
||||||
|
$link = `$compiler $openmp -mabi=n32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||||
|
} else {
|
||||||
$link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
$link = `$compiler $openmp -mabi=32 -v ftest2.f 2>&1 && rm -f a.out a.exe`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
$binary = "" if ($?);
|
$binary = "" if ($?);
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,6 +4,7 @@ endif
|
||||||
|
|
||||||
TOPDIR = ..
|
TOPDIR = ..
|
||||||
include $(TOPDIR)/Makefile.system
|
include $(TOPDIR)/Makefile.system
|
||||||
|
include $(TOPDIR)/Makefile.conf
|
||||||
|
|
||||||
ifdef TARGET_CORE
|
ifdef TARGET_CORE
|
||||||
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)
|
||||||
|
|
|
@ -30,10 +30,17 @@ IDMAXKERNEL = ../mips/imax.c
|
||||||
ISMINKERNEL = ../mips/imin.c
|
ISMINKERNEL = ../mips/imin.c
|
||||||
IDMINKERNEL = ../mips/imin.c
|
IDMINKERNEL = ../mips/imin.c
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
SASUMKERNEL = ../mips/sasum_msa.c
|
SASUMKERNEL = ../mips/sasum_msa.c
|
||||||
DASUMKERNEL = ../mips/dasum_msa.c
|
DASUMKERNEL = ../mips/dasum_msa.c
|
||||||
CASUMKERNEL = ../mips/casum_msa.c
|
CASUMKERNEL = ../mips/casum_msa.c
|
||||||
ZASUMKERNEL = ../mips/zasum_msa.c
|
ZASUMKERNEL = ../mips/zasum_msa.c
|
||||||
|
else
|
||||||
|
SASUMKERNEL = ../mips/asum.c
|
||||||
|
DASUMKERNEL = ../mips/asum.c
|
||||||
|
CASUMKERNEL = ../mips/asum.c
|
||||||
|
ZASUMKERNEL = ../mips/asum.c
|
||||||
|
endif
|
||||||
|
|
||||||
SAXPYKERNEL = ../mips/axpy.c
|
SAXPYKERNEL = ../mips/axpy.c
|
||||||
DAXPYKERNEL = ../mips/axpy.c
|
DAXPYKERNEL = ../mips/axpy.c
|
||||||
|
@ -45,10 +52,17 @@ DCOPYKERNEL = ../mips/copy.c
|
||||||
CCOPYKERNEL = ../mips/zcopy.c
|
CCOPYKERNEL = ../mips/zcopy.c
|
||||||
ZCOPYKERNEL = ../mips/zcopy.c
|
ZCOPYKERNEL = ../mips/zcopy.c
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
SDOTKERNEL = ../mips/sdot_msa.c
|
SDOTKERNEL = ../mips/sdot_msa.c
|
||||||
DDOTKERNEL = ../mips/ddot_msa.c
|
DDOTKERNEL = ../mips/ddot_msa.c
|
||||||
CDOTKERNEL = ../mips/cdot_msa.c
|
CDOTKERNEL = ../mips/cdot_msa.c
|
||||||
ZDOTKERNEL = ../mips/zdot_msa.c
|
ZDOTKERNEL = ../mips/zdot_msa.c
|
||||||
|
else
|
||||||
|
SDOTKERNEL = ../mips/dot.c
|
||||||
|
DDOTKERNEL = ../mips/dot.c
|
||||||
|
CDOTKERNEL = ../mips/zdot.c
|
||||||
|
ZDOTKERNEL = ../mips/zdot.c
|
||||||
|
endif
|
||||||
|
|
||||||
SNRM2KERNEL = ../mips/nrm2.c
|
SNRM2KERNEL = ../mips/nrm2.c
|
||||||
DNRM2KERNEL = ../mips/nrm2.c
|
DNRM2KERNEL = ../mips/nrm2.c
|
||||||
|
@ -70,22 +84,45 @@ DSWAPKERNEL = ../mips/swap.c
|
||||||
CSWAPKERNEL = ../mips/zswap.c
|
CSWAPKERNEL = ../mips/zswap.c
|
||||||
ZSWAPKERNEL = ../mips/zswap.c
|
ZSWAPKERNEL = ../mips/zswap.c
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
SGEMVNKERNEL = ../mips/sgemv_n_msa.c
|
SGEMVNKERNEL = ../mips/sgemv_n_msa.c
|
||||||
DGEMVNKERNEL = ../mips/dgemv_n_msa.c
|
DGEMVNKERNEL = ../mips/dgemv_n_msa.c
|
||||||
CGEMVNKERNEL = ../mips/cgemv_n_msa.c
|
CGEMVNKERNEL = ../mips/cgemv_n_msa.c
|
||||||
ZGEMVNKERNEL = ../mips/zgemv_n_msa.c
|
ZGEMVNKERNEL = ../mips/zgemv_n_msa.c
|
||||||
|
else
|
||||||
|
SGEMVNKERNEL = ../mips/gemv_n.c
|
||||||
|
DGEMVNKERNEL = ../mips/gemv_n.c
|
||||||
|
CGEMVNKERNEL = ../mips/zgemv_n.c
|
||||||
|
ZGEMVNKERNEL = ../mips/zgemv_n.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
SGEMVTKERNEL = ../mips/sgemv_t_msa.c
|
SGEMVTKERNEL = ../mips/sgemv_t_msa.c
|
||||||
DGEMVTKERNEL = ../mips/dgemv_t_msa.c
|
DGEMVTKERNEL = ../mips/dgemv_t_msa.c
|
||||||
CGEMVTKERNEL = ../mips/cgemv_t_msa.c
|
CGEMVTKERNEL = ../mips/cgemv_t_msa.c
|
||||||
ZGEMVTKERNEL = ../mips/zgemv_t_msa.c
|
ZGEMVTKERNEL = ../mips/zgemv_t_msa.c
|
||||||
|
else
|
||||||
|
SGEMVTKERNEL = ../mips/gemv_t.c
|
||||||
|
DGEMVTKERNEL = ../mips/gemv_t.c
|
||||||
|
CGEMVTKERNEL = ../mips/zgemv_t.c
|
||||||
|
ZGEMVTKERNEL = ../mips/zgemv_t.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
SGEMMKERNEL = ../mips/sgemm_kernel_8x8_msa.c
|
SGEMMKERNEL = ../mips/sgemm_kernel_8x8_msa.c
|
||||||
SGEMMONCOPY = ../mips/sgemm_ncopy_8_msa.c
|
SGEMMONCOPY = ../mips/sgemm_ncopy_8_msa.c
|
||||||
SGEMMOTCOPY = ../mips/sgemm_tcopy_8_msa.c
|
SGEMMOTCOPY = ../mips/sgemm_tcopy_8_msa.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
else
|
||||||
|
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
DGEMMKERNEL = ../mips/dgemm_kernel_8x4_msa.c
|
DGEMMKERNEL = ../mips/dgemm_kernel_8x4_msa.c
|
||||||
DGEMMINCOPY = ../mips/dgemm_ncopy_8_msa.c
|
DGEMMINCOPY = ../mips/dgemm_ncopy_8_msa.c
|
||||||
DGEMMITCOPY = ../mips/dgemm_tcopy_8_msa.c
|
DGEMMITCOPY = ../mips/dgemm_tcopy_8_msa.c
|
||||||
|
@ -95,7 +132,15 @@ DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
else
|
||||||
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
CGEMMKERNEL = ../mips/cgemm_kernel_8x4_msa.c
|
CGEMMKERNEL = ../mips/cgemm_kernel_8x4_msa.c
|
||||||
CGEMMINCOPY = ../mips/cgemm_ncopy_8_msa.c
|
CGEMMINCOPY = ../mips/cgemm_ncopy_8_msa.c
|
||||||
CGEMMITCOPY = ../mips/cgemm_tcopy_8_msa.c
|
CGEMMITCOPY = ../mips/cgemm_tcopy_8_msa.c
|
||||||
|
@ -105,29 +150,72 @@ CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
else
|
||||||
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
ZGEMMKERNEL = ../mips/zgemm_kernel_4x4_msa.c
|
ZGEMMKERNEL = ../mips/zgemm_kernel_4x4_msa.c
|
||||||
ZGEMMONCOPY = ../mips/zgemm_ncopy_4_msa.c
|
ZGEMMONCOPY = ../mips/zgemm_ncopy_4_msa.c
|
||||||
ZGEMMOTCOPY = ../mips/zgemm_tcopy_4_msa.c
|
ZGEMMOTCOPY = ../mips/zgemm_tcopy_4_msa.c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
else
|
||||||
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
STRSMKERNEL_LN = ../mips/strsm_kernel_LN_8x8_msa.c
|
STRSMKERNEL_LN = ../mips/strsm_kernel_LN_8x8_msa.c
|
||||||
STRSMKERNEL_LT = ../mips/strsm_kernel_LT_8x8_msa.c
|
STRSMKERNEL_LT = ../mips/strsm_kernel_LT_8x8_msa.c
|
||||||
STRSMKERNEL_RN = ../mips/strsm_kernel_RN_8x8_msa.c
|
STRSMKERNEL_RN = ../mips/strsm_kernel_RN_8x8_msa.c
|
||||||
STRSMKERNEL_RT = ../mips/strsm_kernel_RT_8x8_msa.c
|
STRSMKERNEL_RT = ../mips/strsm_kernel_RT_8x8_msa.c
|
||||||
|
else
|
||||||
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
DTRSMKERNEL_LN = ../mips/dtrsm_kernel_LN_8x4_msa.c
|
DTRSMKERNEL_LN = ../mips/dtrsm_kernel_LN_8x4_msa.c
|
||||||
DTRSMKERNEL_LT = ../mips/dtrsm_kernel_LT_8x4_msa.c
|
DTRSMKERNEL_LT = ../mips/dtrsm_kernel_LT_8x4_msa.c
|
||||||
DTRSMKERNEL_RN = ../mips/dtrsm_kernel_RN_8x4_msa.c
|
DTRSMKERNEL_RN = ../mips/dtrsm_kernel_RN_8x4_msa.c
|
||||||
DTRSMKERNEL_RT = ../mips/dtrsm_kernel_RT_8x4_msa.c
|
DTRSMKERNEL_RT = ../mips/dtrsm_kernel_RT_8x4_msa.c
|
||||||
|
else
|
||||||
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
else
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
endif
|
||||||
|
|
||||||
|
ifdef MIPS_SIMD
|
||||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
else
|
||||||
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
endif
|
|
@ -138,26 +138,26 @@ ALLOBJ=$(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \
|
||||||
$(ZBLAS2) $(ZBLAS3) $(ALLBLAS)
|
$(ZBLAS2) $(ZBLAS3) $(ALLBLAS)
|
||||||
|
|
||||||
$(BLASLIB): $(ALLOBJ)
|
$(BLASLIB): $(ALLOBJ)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ)
|
$(AR) $(ARFLAGS) $@ $(ALLOBJ)
|
||||||
$(RANLIB) $@
|
$(RANLIB) $@
|
||||||
|
|
||||||
single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3)
|
single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(SBLAS1) $(ALLBLAS) \
|
$(AR) $(ARFLAGS) $(BLASLIB) $(SBLAS1) $(ALLBLAS) \
|
||||||
$(SBLAS2) $(SBLAS3)
|
$(SBLAS2) $(SBLAS3)
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3)
|
double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \
|
$(AR) $(ARFLAGS) $(BLASLIB) $(DBLAS1) $(ALLBLAS) \
|
||||||
$(DBLAS2) $(DBLAS3)
|
$(DBLAS2) $(DBLAS3)
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3)
|
complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(CBLAS1) $(CB1AUX) \
|
$(AR) $(ARFLAGS) $(BLASLIB) $(CBLAS1) $(CB1AUX) \
|
||||||
$(ALLBLAS) $(CBLAS2) $(CBLAS3)
|
$(ALLBLAS) $(CBLAS2) $(CBLAS3)
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3)
|
complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $(ZBLAS1) $(ZB1AUX) \
|
$(AR) $(ARFLAGS) $(BLASLIB) $(ZBLAS1) $(ZB1AUX) \
|
||||||
$(ALLBLAS) $(ZBLAS2) $(ZBLAS3)
|
$(ALLBLAS) $(ZBLAS2) $(ZBLAS3)
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
|
|
|
@ -44,6 +44,6 @@ FFLAGS = -O3
|
||||||
# Archive programs and flags
|
# Archive programs and flags
|
||||||
#-----------------------------------------------------------------------------
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
|
|
|
@ -73,27 +73,27 @@ alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1)
|
||||||
|
|
||||||
# Single real precision
|
# Single real precision
|
||||||
slib1: $(slev1) $(sclev1)
|
slib1: $(slev1) $(sclev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev1) $(sclev1)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(slev1) $(sclev1)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double real precision
|
# Double real precision
|
||||||
dlib1: $(dlev1)
|
dlib1: $(dlev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev1)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(dlev1)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Single complex precision
|
# Single complex precision
|
||||||
clib1: $(clev1) $(sclev1)
|
clib1: $(clev1) $(sclev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev1) $(sclev1)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(clev1) $(sclev1)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double complex precision
|
# Double complex precision
|
||||||
zlib1: $(zlev1)
|
zlib1: $(zlev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev1)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(zlev1)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# All precisions
|
# All precisions
|
||||||
all1: $(alev1)
|
all1: $(alev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev1)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(alev1)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -146,27 +146,27 @@ alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2)
|
||||||
|
|
||||||
# Single real precision
|
# Single real precision
|
||||||
slib2: $(slev2) $(errhand)
|
slib2: $(slev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev2) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(slev2) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double real precision
|
# Double real precision
|
||||||
dlib2: $(dlev2) $(errhand)
|
dlib2: $(dlev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev2) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(dlev2) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Single complex precision
|
# Single complex precision
|
||||||
clib2: $(clev2) $(errhand)
|
clib2: $(clev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev2) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(clev2) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double complex precision
|
# Double complex precision
|
||||||
zlib2: $(zlev2) $(errhand)
|
zlib2: $(zlev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev2) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(zlev2) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# All precisions
|
# All precisions
|
||||||
all2: $(alev2) $(errhand)
|
all2: $(alev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev2) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(alev2) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
@ -211,32 +211,32 @@ alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3)
|
||||||
|
|
||||||
# Single real precision
|
# Single real precision
|
||||||
slib3: $(slev3) $(errhand)
|
slib3: $(slev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev3) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(slev3) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double real precision
|
# Double real precision
|
||||||
dlib3: $(dlev3) $(errhand)
|
dlib3: $(dlev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev3) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(dlev3) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Single complex precision
|
# Single complex precision
|
||||||
clib3: $(clev3) $(errhand)
|
clib3: $(clev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev3) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(clev3) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Single complex precision
|
# Single complex precision
|
||||||
zlib3: $(zlev3) $(errhand)
|
zlib3: $(zlev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev3) $(errhand)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(zlev3) $(errhand)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# All precisions
|
# All precisions
|
||||||
all3: $(alev3) $(errhand)
|
all3: $(alev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev3)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(alev3)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# All levels and precisions
|
# All levels and precisions
|
||||||
cblaslib: $(alev)
|
cblaslib: $(alev)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev)
|
$(AR) $(ARFLAGS) $(CBLASLIB) $(alev)
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
FRC:
|
FRC:
|
||||||
|
|
|
@ -466,7 +466,7 @@ TIMER = EXT_ETIME
|
||||||
Refer to the section~\ref{second} to get more information.
|
Refer to the section~\ref{second} to get more information.
|
||||||
|
|
||||||
|
|
||||||
Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver,
|
Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver,
|
||||||
archiver options, and ranlib for your machine. If your architecture
|
archiver options, and ranlib for your machine. If your architecture
|
||||||
does not require \texttt{ranlib} to be run after each archive command (as
|
does not require \texttt{ranlib} to be run after each archive command (as
|
||||||
is the case with CRAY computers running UNICOS, Hewlett Packard
|
is the case with CRAY computers running UNICOS, Hewlett Packard
|
||||||
|
|
|
@ -52,8 +52,8 @@ CFLAGS = -O4
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -52,8 +52,8 @@ CFLAGS =
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -55,8 +55,8 @@ CFLAGS = -O3
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -55,8 +55,8 @@ CFLAGS = -O3
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -52,8 +52,8 @@ CFLAGS = -O4
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -52,8 +52,8 @@ CFLAGS = -O3
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -56,8 +56,8 @@ CFLAGS = -O3
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -53,8 +53,8 @@ CFLAGS = -O3 -qnosave
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -56,8 +56,8 @@ CFLAGS = -O3
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -56,8 +56,8 @@ CFLAGS = -g
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -52,8 +52,8 @@ CFLAGS = -O3
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -52,8 +52,8 @@ CFLAGS =
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -52,8 +52,8 @@ CFLAGS =
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
|
@ -2207,8 +2207,8 @@ OBJ_FILES := $(C_FILES:.o=.o)
|
||||||
all: ../../$(LAPACKELIB)
|
all: ../../$(LAPACKELIB)
|
||||||
|
|
||||||
../../$(LAPACKELIB): $(ALLOBJA) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED)
|
../../$(LAPACKELIB): $(ALLOBJA) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED)
|
||||||
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJA)
|
$(AR) $(ARFLAGS) ../../$(LAPACKELIB) $(ALLOBJA)
|
||||||
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED)
|
$(AR) $(ARFLAGS) ../../$(LAPACKELIB) $(ALLOBJB) $(ALLXOBJ) $(DEPRECATED)
|
||||||
$(RANLIB) ../../$(LAPACKELIB)
|
$(RANLIB) ../../$(LAPACKELIB)
|
||||||
|
|
||||||
.c.o:
|
.c.o:
|
||||||
|
|
|
@ -186,7 +186,7 @@ OBJ = lapacke_cgb_nancheck.o \
|
||||||
all: lib
|
all: lib
|
||||||
|
|
||||||
lib: $(OBJ)
|
lib: $(OBJ)
|
||||||
$(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(OBJ)
|
$(AR) $(ARFLAGS) ../../$(LAPACKELIB) $(OBJ)
|
||||||
$(RANLIB) ../../$(LAPACKELIB)
|
$(RANLIB) ../../$(LAPACKELIB)
|
||||||
|
|
||||||
.c.o:
|
.c.o:
|
||||||
|
|
|
@ -455,26 +455,26 @@ endif
|
||||||
all: ../$(LAPACKLIB)
|
all: ../$(LAPACKLIB)
|
||||||
|
|
||||||
../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED)
|
../$(LAPACKLIB): $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED)
|
$(AR) $(ARFLAGS) $@ $(ALLOBJ) $(ALLXOBJ) $(DEPRECATED)
|
||||||
$(RANLIB) $@
|
$(RANLIB) $@
|
||||||
|
|
||||||
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
|
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
|
||||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
|
$(AR) $(ARFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
|
||||||
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
||||||
$(RANLIB) ../$(LAPACKLIB)
|
$(RANLIB) ../$(LAPACKLIB)
|
||||||
|
|
||||||
complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX)
|
complex: $(CLASRC) $(ZCLASRC) $(CXLASRC) $(SCLAUX) $(ALLAUX)
|
||||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \
|
$(AR) $(ARFLAGS) ../$(LAPACKLIB) $(CLASRC) $(ZCLASRC) \
|
||||||
$(CXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
$(CXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
||||||
$(RANLIB) ../$(LAPACKLIB)
|
$(RANLIB) ../$(LAPACKLIB)
|
||||||
|
|
||||||
double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX)
|
double: $(DLASRC) $(DSLASRC) $(DXLASRC) $(DZLAUX) $(ALLAUX)
|
||||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \
|
$(AR) $(ARFLAGS) ../$(LAPACKLIB) $(DLASRC) $(DSLASRC) \
|
||||||
$(DXLASRC) $(DZLAUX) $(ALLAUX) $(ALLXAUX)
|
$(DXLASRC) $(DZLAUX) $(ALLAUX) $(ALLXAUX)
|
||||||
$(RANLIB) ../$(LAPACKLIB)
|
$(RANLIB) ../$(LAPACKLIB)
|
||||||
|
|
||||||
complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX)
|
complex16: $(ZLASRC) $(ZCLASRC) $(ZXLASRC) $(DZLAUX) $(ALLAUX)
|
||||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \
|
$(AR) $(ARFLAGS) ../$(LAPACKLIB) $(ZLASRC) $(ZCLASRC) \
|
||||||
$(ZXLASRC) $(DZLAUX) $(ALLAUX) $(ALLXAUX)
|
$(ZXLASRC) $(DZLAUX) $(ALLAUX) $(ALLXAUX)
|
||||||
$(RANLIB) ../$(LAPACKLIB)
|
$(RANLIB) ../$(LAPACKLIB)
|
||||||
|
|
||||||
|
|
|
@ -35,27 +35,27 @@ QRLL = qr/LL/cgeqrf.o qr/LL/dgeqrf.o qr/LL/sgeqrf.o qr/LL/zgeqrf.o qr/LL/sceil.
|
||||||
all: cholrl choltop lucr lull lurec qrll
|
all: cholrl choltop lucr lull lurec qrll
|
||||||
|
|
||||||
cholrl: $(CHOLRL)
|
cholrl: $(CHOLRL)
|
||||||
$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL)
|
$(AR) $(ARFLAGS) $(VARIANTSDIR)/cholrl.a $(CHOLRL)
|
||||||
$(RANLIB) $(VARIANTSDIR)/cholrl.a
|
$(RANLIB) $(VARIANTSDIR)/cholrl.a
|
||||||
|
|
||||||
choltop: $(CHOLTOP)
|
choltop: $(CHOLTOP)
|
||||||
$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP)
|
$(AR) $(ARFLAGS) $(VARIANTSDIR)/choltop.a $(CHOLTOP)
|
||||||
$(RANLIB) $(VARIANTSDIR)/choltop.a
|
$(RANLIB) $(VARIANTSDIR)/choltop.a
|
||||||
|
|
||||||
lucr: $(LUCR)
|
lucr: $(LUCR)
|
||||||
$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR)
|
$(AR) $(ARFLAGS) $(VARIANTSDIR)/lucr.a $(LUCR)
|
||||||
$(RANLIB) $(VARIANTSDIR)/lucr.a
|
$(RANLIB) $(VARIANTSDIR)/lucr.a
|
||||||
|
|
||||||
lull: $(LULL)
|
lull: $(LULL)
|
||||||
$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lull.a $(LULL)
|
$(AR) $(ARFLAGS) $(VARIANTSDIR)/lull.a $(LULL)
|
||||||
$(RANLIB) $(VARIANTSDIR)/lull.a
|
$(RANLIB) $(VARIANTSDIR)/lull.a
|
||||||
|
|
||||||
lurec: $(LUREC)
|
lurec: $(LUREC)
|
||||||
$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC)
|
$(AR) $(ARFLAGS) $(VARIANTSDIR)/lurec.a $(LUREC)
|
||||||
$(RANLIB) $(VARIANTSDIR)/lurec.a
|
$(RANLIB) $(VARIANTSDIR)/lurec.a
|
||||||
|
|
||||||
qrll: $(QRLL)
|
qrll: $(QRLL)
|
||||||
$(ARCH) $(ARCHFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL)
|
$(AR) $(ARFLAGS) $(VARIANTSDIR)/qrll.a $(QRLL)
|
||||||
$(RANLIB) $(VARIANTSDIR)/qrll.a
|
$(RANLIB) $(VARIANTSDIR)/qrll.a
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -59,23 +59,23 @@ ALLOBJ = $(SMATGEN) $(CMATGEN) $(SCATGEN) $(DMATGEN) $(ZMATGEN) \
|
||||||
$(DZATGEN)
|
$(DZATGEN)
|
||||||
|
|
||||||
../../$(TMGLIB): $(ALLOBJ)
|
../../$(TMGLIB): $(ALLOBJ)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ)
|
$(AR) $(ARFLAGS) $@ $(ALLOBJ)
|
||||||
$(RANLIB) $@
|
$(RANLIB) $@
|
||||||
|
|
||||||
single: $(SMATGEN) $(SCATGEN)
|
single: $(SMATGEN) $(SCATGEN)
|
||||||
$(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(SMATGEN) $(SCATGEN)
|
$(AR) $(ARFLAGS) ../../$(TMGLIB) $(SMATGEN) $(SCATGEN)
|
||||||
$(RANLIB) ../../$(TMGLIB)
|
$(RANLIB) ../../$(TMGLIB)
|
||||||
|
|
||||||
complex: $(CMATGEN) $(SCATGEN)
|
complex: $(CMATGEN) $(SCATGEN)
|
||||||
$(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(CMATGEN) $(SCATGEN)
|
$(AR) $(ARFLAGS) ../../$(TMGLIB) $(CMATGEN) $(SCATGEN)
|
||||||
$(RANLIB) ../../$(TMGLIB)
|
$(RANLIB) ../../$(TMGLIB)
|
||||||
|
|
||||||
double: $(DMATGEN) $(DZATGEN)
|
double: $(DMATGEN) $(DZATGEN)
|
||||||
$(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(DMATGEN) $(DZATGEN)
|
$(AR) $(ARFLAGS) ../../$(TMGLIB) $(DMATGEN) $(DZATGEN)
|
||||||
$(RANLIB) ../../$(TMGLIB)
|
$(RANLIB) ../../$(TMGLIB)
|
||||||
|
|
||||||
complex16: $(ZMATGEN) $(DZATGEN)
|
complex16: $(ZMATGEN) $(DZATGEN)
|
||||||
$(ARCH) $(ARCHFLAGS) ../../$(TMGLIB) $(ZMATGEN) $(DZATGEN)
|
$(AR) $(ARFLAGS) ../../$(TMGLIB) $(ZMATGEN) $(DZATGEN)
|
||||||
$(RANLIB) ../../$(TMGLIB)
|
$(RANLIB) ../../$(TMGLIB)
|
||||||
|
|
||||||
$(SCATGEN): $(FRC)
|
$(SCATGEN): $(FRC)
|
||||||
|
|
|
@ -57,8 +57,8 @@ CFLAGS = -O3
|
||||||
# The archiver and the flag(s) to use when building archive (library)
|
# The archiver and the flag(s) to use when building archive (library)
|
||||||
# If you system has no ranlib, set RANLIB = echo.
|
# If you system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS= cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
#
|
#
|
||||||
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
# Location of the extended-precision BLAS (XBLAS) Fortran library
|
||||||
|
|
2
make.inc
2
make.inc
|
@ -1,6 +1,6 @@
|
||||||
SHELL = /bin/sh
|
SHELL = /bin/sh
|
||||||
PLAT = _LINUX
|
PLAT = _LINUX
|
||||||
DRVOPTS = $(NOOPT)
|
DRVOPTS = $(NOOPT)
|
||||||
ARCHFLAGS= -ru
|
ARFLAGS= -ru
|
||||||
#RANLIB = ranlib
|
#RANLIB = ranlib
|
||||||
|
|
||||||
|
|
48
param.h
48
param.h
|
@ -2174,7 +2174,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(I6400) || defined(P6600)
|
#if defined(P5600) || defined(I6400) || defined(P6600)
|
||||||
#define SNUMOPT 2
|
#define SNUMOPT 2
|
||||||
#define DNUMOPT 2
|
#define DNUMOPT 2
|
||||||
|
|
||||||
|
@ -2182,6 +2182,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define GEMM_DEFAULT_OFFSET_B 0
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
|
#ifdef MIPS_SIMD
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
#define SGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||||
|
|
||||||
|
@ -2193,46 +2194,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
#else
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 128
|
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define DGEMM_DEFAULT_P 128
|
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define CGEMM_DEFAULT_P 96
|
|
||||||
#define ZGEMM_DEFAULT_P 64
|
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 240
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define DGEMM_DEFAULT_Q 120
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define CGEMM_DEFAULT_Q 120
|
|
||||||
#define ZGEMM_DEFAULT_Q 120
|
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 12288
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define DGEMM_DEFAULT_R 8192
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define CGEMM_DEFAULT_R 4096
|
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
|
||||||
|
|
||||||
|
|
||||||
#define SYMV_P 16
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(P5600)
|
|
||||||
#define SNUMOPT 2
|
|
||||||
#define DNUMOPT 2
|
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
|
||||||
#define GEMM_DEFAULT_OFFSET_B 0
|
|
||||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
|
||||||
|
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
|
||||||
|
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
|
||||||
|
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 128
|
#define SGEMM_DEFAULT_P 128
|
||||||
#define DGEMM_DEFAULT_P 128
|
#define DGEMM_DEFAULT_P 128
|
||||||
#define CGEMM_DEFAULT_P 96
|
#define CGEMM_DEFAULT_P 96
|
||||||
|
@ -2248,7 +2223,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
|
|
||||||
#define SYMV_P 16
|
#define SYMV_P 16
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue