Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS into wernsaar-develop
Conflicts: kernel/arm/KERNEL.ARMV6
This commit is contained in:
commit
406f5bd22b
9
Makefile
9
Makefile
|
@ -262,11 +262,10 @@ endif
|
||||||
|
|
||||||
|
|
||||||
lapack-test :
|
lapack-test :
|
||||||
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
make -j 1 -C $(NETLIB_LAPACK_DIR) tmglib
|
||||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||||
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
|
|
||||||
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
|
||||||
|
|
||||||
dummy :
|
dummy :
|
||||||
|
|
||||||
|
|
|
@ -10,3 +10,9 @@ FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
ifeq ($(CORE), ARMV5)
|
||||||
|
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||||
|
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -76,10 +76,10 @@ VERSION = 0.2.9.rc2
|
||||||
# Unfortunately most of kernel won't give us high quality buffer.
|
# Unfortunately most of kernel won't give us high quality buffer.
|
||||||
# BLAS tries to find the best region before entering main function,
|
# BLAS tries to find the best region before entering main function,
|
||||||
# but it will consume time. If you don't like it, you can disable one.
|
# but it will consume time. If you don't like it, you can disable one.
|
||||||
# NO_WARMUP = 1
|
NO_WARMUP = 1
|
||||||
|
|
||||||
# If you want to disable CPU/Memory affinity on Linux.
|
# If you want to disable CPU/Memory affinity on Linux.
|
||||||
# NO_AFFINITY = 1
|
NO_AFFINITY = 1
|
||||||
|
|
||||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||||
# and OS. However, the performance is low.
|
# and OS. However, the performance is low.
|
||||||
|
@ -129,6 +129,9 @@ VERSION = 0.2.9.rc2
|
||||||
# The default -O2 is enough.
|
# The default -O2 is enough.
|
||||||
# COMMON_OPT = -O2
|
# COMMON_OPT = -O2
|
||||||
|
|
||||||
|
# gfortran option for LAPACK
|
||||||
|
FCOMMON_OPT = -frecursive
|
||||||
|
|
||||||
# Profiling flags
|
# Profiling flags
|
||||||
COMMON_PROF = -pg
|
COMMON_PROF = -pg
|
||||||
|
|
||||||
|
|
|
@ -158,6 +158,7 @@ endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), Linux)
|
ifeq ($(OSNAME), Linux)
|
||||||
EXTRALIB += -lm
|
EXTRALIB += -lm
|
||||||
|
NO_EXPRECISION = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
|
@ -846,19 +847,6 @@ ifeq ($(DEBUG), 1)
|
||||||
COMMON_OPT += -g
|
COMMON_OPT += -g
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef COMMON_OPT
|
|
||||||
ifeq ($(ARCH), arm)
|
|
||||||
COMMON_OPT = -O3
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifndef COMMON_OPT
|
|
||||||
ifeq ($(ARCH), arm64)
|
|
||||||
COMMON_OPT = -O3
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
|
|
||||||
ifndef COMMON_OPT
|
ifndef COMMON_OPT
|
||||||
COMMON_OPT = -O2
|
COMMON_OPT = -O2
|
||||||
endif
|
endif
|
||||||
|
|
9
common.h
9
common.h
|
@ -310,10 +310,17 @@ typedef int blasint;
|
||||||
#define YIELDING SwitchToThread()
|
#define YIELDING SwitchToThread()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
|
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
|
||||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef BULLDOZER
|
||||||
|
#ifndef YIELDING
|
||||||
|
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef PILEDRIVER
|
#ifdef PILEDRIVER
|
||||||
#ifndef YIELDING
|
#ifndef YIELDING
|
||||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||||
|
|
|
@ -3,7 +3,7 @@ include ../../Makefile.system
|
||||||
|
|
||||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
|
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
|
||||||
|
|
||||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||||
|
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
||||||
|
|
16
getarch.c
16
getarch.c
|
@ -724,6 +724,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE_ARMV5
|
||||||
|
#define FORCE
|
||||||
|
#define ARCHITECTURE "ARM"
|
||||||
|
#define SUBARCHITECTURE "ARMV5"
|
||||||
|
#define SUBDIRNAME "arm"
|
||||||
|
#define ARCHCONFIG "-DARMV5 " \
|
||||||
|
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||||
|
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||||
|
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||||
|
"-DHAVE_VFP"
|
||||||
|
#define LIBNAME "armv5"
|
||||||
|
#define CORENAME "ARMV5"
|
||||||
|
#else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef FORCE_ARMV8
|
#ifdef FORCE_ARMV8
|
||||||
#define FORCE
|
#define FORCE
|
||||||
#define ARCHITECTURE "ARM64"
|
#define ARCHITECTURE "ARM64"
|
||||||
|
|
|
@ -2,11 +2,11 @@ TOPDIR = ..
|
||||||
include $(TOPDIR)/Makefile.system
|
include $(TOPDIR)/Makefile.system
|
||||||
|
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
SUPPORT_GEMM3M = 1
|
SUPPORT_GEMM3M = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
SUPPORT_GEMM3M = 1
|
SUPPORT_GEMM3M = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), ia64)
|
ifeq ($(ARCH), ia64)
|
||||||
|
@ -342,30 +342,51 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
||||||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
||||||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||||
|
|
||||||
|
#SLAPACKOBJS = \
|
||||||
|
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
|
||||||
|
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
|
||||||
|
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
|
||||||
|
|
||||||
SLAPACKOBJS = \
|
SLAPACKOBJS = \
|
||||||
sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
|
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
||||||
spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
|
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX)
|
||||||
slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
|
|
||||||
|
|
||||||
|
#DLAPACKOBJS = \
|
||||||
|
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
|
||||||
|
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
|
||||||
|
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
|
||||||
|
|
||||||
DLAPACKOBJS = \
|
DLAPACKOBJS = \
|
||||||
dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
|
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
|
||||||
dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
|
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX)
|
||||||
dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
|
|
||||||
|
|
||||||
QLAPACKOBJS = \
|
QLAPACKOBJS = \
|
||||||
qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \
|
qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \
|
||||||
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
|
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
|
||||||
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \
|
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \
|
||||||
|
|
||||||
|
#CLAPACKOBJS = \
|
||||||
|
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
|
||||||
|
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
|
||||||
|
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
|
||||||
|
|
||||||
CLAPACKOBJS = \
|
CLAPACKOBJS = \
|
||||||
cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
|
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
|
||||||
cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
|
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX)
|
||||||
claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
|
|
||||||
|
|
||||||
|
#ZLAPACKOBJS = \
|
||||||
|
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
|
||||||
|
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
|
||||||
|
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
|
||||||
|
|
||||||
ZLAPACKOBJS = \
|
ZLAPACKOBJS = \
|
||||||
zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
|
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
|
||||||
zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
|
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX)
|
||||||
zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
|
|
||||||
|
|
||||||
|
|
||||||
XLAPACKOBJS = \
|
XLAPACKOBJS = \
|
||||||
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
|
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
|
||||||
|
@ -375,10 +396,10 @@ XLAPACKOBJS = \
|
||||||
ifneq ($(NO_LAPACK), 1)
|
ifneq ($(NO_LAPACK), 1)
|
||||||
SBLASOBJS += $(SLAPACKOBJS)
|
SBLASOBJS += $(SLAPACKOBJS)
|
||||||
DBLASOBJS += $(DLAPACKOBJS)
|
DBLASOBJS += $(DLAPACKOBJS)
|
||||||
QBLASOBJS += $(QLAPACKOBJS)
|
#QBLASOBJS += $(QLAPACKOBJS)
|
||||||
CBLASOBJS += $(CLAPACKOBJS)
|
CBLASOBJS += $(CLAPACKOBJS)
|
||||||
ZBLASOBJS += $(ZLAPACKOBJS)
|
ZBLASOBJS += $(ZLAPACKOBJS)
|
||||||
XBLASOBJS += $(XLAPACKOBJS)
|
#XBLASOBJS += $(XLAPACKOBJS)
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -1731,37 +1752,37 @@ cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c
|
||||||
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
|
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
|
||||||
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)
|
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)
|
||||||
|
|
||||||
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : getf2.c
|
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : getf2.c
|
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c
|
qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : zgetf2.c
|
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : zgetf2.c
|
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c
|
xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : getrf.c
|
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : getrf.c
|
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c
|
qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : zgetrf.c
|
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : zgetrf.c
|
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
|
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
|
||||||
|
@ -1803,37 +1824,37 @@ zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c
|
||||||
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
|
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : potf2.c
|
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : potf2.c
|
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c
|
qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : zpotf2.c
|
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : zpotf2.c
|
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c
|
xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : potrf.c
|
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : potrf.c
|
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c
|
qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : zpotrf.c
|
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : zpotrf.c
|
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
|
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
|
||||||
|
@ -1875,55 +1896,55 @@ ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c
|
||||||
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
|
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : laswp.c
|
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : laswp.c
|
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c
|
qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
claswp.$(SUFFIX) claswp.$(PSUFFIX) : zlaswp.c
|
claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : zlaswp.c
|
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c
|
xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : getrs.c
|
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : getrs.c
|
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c
|
qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : zgetrs.c
|
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : zgetrs.c
|
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c
|
xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : gesv.c
|
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : gesv.c
|
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c
|
qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : gesv.c
|
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : gesv.c
|
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c
|
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c
|
||||||
|
|
|
@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
qsbmv_thread_U, qsbmv_thread_L,
|
qsbmv_thread_U, qsbmv_thread_L,
|
||||||
|
@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
blasint info;
|
blasint info;
|
||||||
int uplo;
|
int uplo;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -130,7 +130,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
|
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
|
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||||
|
|
|
@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLAS
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
qspmv_thread_U, qspmv_thread_L,
|
qspmv_thread_U, qspmv_thread_L,
|
||||||
|
@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||||
blasint info;
|
blasint info;
|
||||||
int uplo;
|
int uplo;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -126,7 +126,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
(spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer);
|
(spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
(spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads);
|
(spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads);
|
||||||
|
|
|
@ -145,12 +145,21 @@ void NAME(char *UPLO, char *TRANS,
|
||||||
if (uplo_arg == 'U') uplo = 0;
|
if (uplo_arg == 'U') uplo = 0;
|
||||||
if (uplo_arg == 'L') uplo = 1;
|
if (uplo_arg == 'L') uplo = 1;
|
||||||
|
|
||||||
|
#ifndef COMPLEX
|
||||||
if (trans_arg == 'N') trans = 0;
|
if (trans_arg == 'N') trans = 0;
|
||||||
#ifndef HEMM
|
|
||||||
if (trans_arg == 'T') trans = 1;
|
if (trans_arg == 'T') trans = 1;
|
||||||
if (trans_arg == 'R') trans = 0;
|
|
||||||
#endif
|
|
||||||
if (trans_arg == 'C') trans = 1;
|
if (trans_arg == 'C') trans = 1;
|
||||||
|
#else
|
||||||
|
#ifdef HEMM
|
||||||
|
if (trans_arg == 'N') trans = 0;
|
||||||
|
if (trans_arg == 'C') trans = 1;
|
||||||
|
#else
|
||||||
|
if (trans_arg == 'N') trans = 0;
|
||||||
|
if (trans_arg == 'T') trans = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
nrowa = args.n;
|
nrowa = args.n;
|
||||||
if (trans & 1) nrowa = args.k;
|
if (trans & 1) nrowa = args.k;
|
||||||
|
|
|
@ -148,12 +148,21 @@ void NAME(char *UPLO, char *TRANS,
|
||||||
if (uplo_arg == 'U') uplo = 0;
|
if (uplo_arg == 'U') uplo = 0;
|
||||||
if (uplo_arg == 'L') uplo = 1;
|
if (uplo_arg == 'L') uplo = 1;
|
||||||
|
|
||||||
|
|
||||||
|
#ifndef COMPLEX
|
||||||
if (trans_arg == 'N') trans = 0;
|
if (trans_arg == 'N') trans = 0;
|
||||||
#ifndef HEMM
|
|
||||||
if (trans_arg == 'T') trans = 1;
|
if (trans_arg == 'T') trans = 1;
|
||||||
if (trans_arg == 'R') trans = 0;
|
|
||||||
#endif
|
|
||||||
if (trans_arg == 'C') trans = 1;
|
if (trans_arg == 'C') trans = 1;
|
||||||
|
#else
|
||||||
|
#ifdef HEMM
|
||||||
|
if (trans_arg == 'N') trans = 0;
|
||||||
|
if (trans_arg == 'C') trans = 1;
|
||||||
|
#else
|
||||||
|
if (trans_arg == 'N') trans = 0;
|
||||||
|
if (trans_arg == 'T') trans = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
nrowa = args.n;
|
nrowa = args.n;
|
||||||
if (trans & 1) nrowa = args.k;
|
if (trans & 1) nrowa = args.k;
|
||||||
|
|
|
@ -61,7 +61,7 @@ static int (*hbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M,
|
xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M,
|
||||||
|
@ -92,7 +92,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
blasint info;
|
blasint info;
|
||||||
int uplo;
|
int uplo;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -138,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -205,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
(hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);
|
(hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
(hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);
|
(hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||||
|
|
|
@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
xsbmv_thread_U, xsbmv_thread_L,
|
xsbmv_thread_U, xsbmv_thread_L,
|
||||||
|
@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
blasint info;
|
blasint info;
|
||||||
int uplo;
|
int uplo;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -131,7 +131,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -139,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
|
|
||||||
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
|
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPBUG
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
|
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
|
||||||
|
|
|
@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
xspmv_thread_U, xspmv_thread_L,
|
xspmv_thread_U, xspmv_thread_L,
|
||||||
|
@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||||
blasint info;
|
blasint info;
|
||||||
int uplo;
|
int uplo;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -135,7 +135,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
||||||
|
|
||||||
(spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer);
|
(spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMPTEST
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,134 @@
|
||||||
|
SAMAXKERNEL = ../arm/amax.c
|
||||||
|
DAMAXKERNEL = ../arm/amax.c
|
||||||
|
CAMAXKERNEL = ../arm/zamax.c
|
||||||
|
ZAMAXKERNEL = ../arm/zamax.c
|
||||||
|
|
||||||
|
SAMINKERNEL = ../arm/amin.c
|
||||||
|
DAMINKERNEL = ../arm/amin.c
|
||||||
|
CAMINKERNEL = ../arm/zamin.c
|
||||||
|
ZAMINKERNEL = ../arm/zamin.c
|
||||||
|
|
||||||
|
SMAXKERNEL = ../arm/max.c
|
||||||
|
DMAXKERNEL = ../arm/max.c
|
||||||
|
|
||||||
|
SMINKERNEL = ../arm/min.c
|
||||||
|
DMINKERNEL = ../arm/min.c
|
||||||
|
|
||||||
|
ISAMAXKERNEL = ../arm/iamax.c
|
||||||
|
IDAMAXKERNEL = ../arm/iamax.c
|
||||||
|
ICAMAXKERNEL = ../arm/izamax.c
|
||||||
|
IZAMAXKERNEL = ../arm/izamax.c
|
||||||
|
|
||||||
|
ISAMINKERNEL = ../arm/iamin.c
|
||||||
|
IDAMINKERNEL = ../arm/iamin.c
|
||||||
|
ICAMINKERNEL = ../arm/izamin.c
|
||||||
|
IZAMINKERNEL = ../arm/izamin.c
|
||||||
|
|
||||||
|
ISMAXKERNEL = ../arm/imax.c
|
||||||
|
IDMAXKERNEL = ../arm/imax.c
|
||||||
|
|
||||||
|
ISMINKERNEL = ../arm/imin.c
|
||||||
|
IDMINKERNEL = ../arm/imin.c
|
||||||
|
|
||||||
|
SASUMKERNEL = ../arm/asum.c
|
||||||
|
DASUMKERNEL = ../arm/asum.c
|
||||||
|
CASUMKERNEL = ../arm/zasum.c
|
||||||
|
ZASUMKERNEL = ../arm/zasum.c
|
||||||
|
|
||||||
|
SAXPYKERNEL = ../arm/axpy.c
|
||||||
|
DAXPYKERNEL = ../arm/axpy.c
|
||||||
|
CAXPYKERNEL = ../arm/zaxpy.c
|
||||||
|
ZAXPYKERNEL = ../arm/zaxpy.c
|
||||||
|
|
||||||
|
SCOPYKERNEL = ../arm/copy.c
|
||||||
|
DCOPYKERNEL = ../arm/copy.c
|
||||||
|
CCOPYKERNEL = ../arm/zcopy.c
|
||||||
|
ZCOPYKERNEL = ../arm/zcopy.c
|
||||||
|
|
||||||
|
SDOTKERNEL = ../arm/dot.c
|
||||||
|
DDOTKERNEL = ../arm/dot.c
|
||||||
|
CDOTKERNEL = ../arm/zdot.c
|
||||||
|
ZDOTKERNEL = ../arm/zdot.c
|
||||||
|
|
||||||
|
SNRM2KERNEL = ../arm/nrm2.c
|
||||||
|
DNRM2KERNEL = ../arm/nrm2.c
|
||||||
|
CNRM2KERNEL = ../arm/znrm2.c
|
||||||
|
ZNRM2KERNEL = ../arm/znrm2.c
|
||||||
|
|
||||||
|
SROTKERNEL = ../arm/rot.c
|
||||||
|
DROTKERNEL = ../arm/rot.c
|
||||||
|
CROTKERNEL = ../arm/zrot.c
|
||||||
|
ZROTKERNEL = ../arm/zrot.c
|
||||||
|
|
||||||
|
SSCALKERNEL = ../arm/scal.c
|
||||||
|
DSCALKERNEL = ../arm/scal.c
|
||||||
|
CSCALKERNEL = ../arm/zscal.c
|
||||||
|
ZSCALKERNEL = ../arm/zscal.c
|
||||||
|
|
||||||
|
SSWAPKERNEL = ../arm/swap.c
|
||||||
|
DSWAPKERNEL = ../arm/swap.c
|
||||||
|
CSWAPKERNEL = ../arm/zswap.c
|
||||||
|
ZSWAPKERNEL = ../arm/zswap.c
|
||||||
|
|
||||||
|
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||||
|
DGEMVNKERNEL = ../arm/gemv_n.c
|
||||||
|
CGEMVNKERNEL = ../arm/zgemv_n.c
|
||||||
|
ZGEMVNKERNEL = ../arm/zgemv_n.c
|
||||||
|
|
||||||
|
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||||
|
DGEMVTKERNEL = ../arm/gemv_t.c
|
||||||
|
CGEMVTKERNEL = ../arm/zgemv_t.c
|
||||||
|
ZGEMVTKERNEL = ../arm/zgemv_t.c
|
||||||
|
|
||||||
|
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
|
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
|
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
|
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
|
|
||||||
|
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
|
||||||
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
|
||||||
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
|
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,20 @@
|
||||||
SGEMVNKERNEL = ../arm/gemv_n.c
|
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||||
|
|
||||||
|
DGEMVNKERNEL = ../arm/gemv_n.c
|
||||||
|
DGEMVTKERNEL = ../arm/gemv_t.c
|
||||||
|
|
||||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
|
||||||
|
#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
|
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
|
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
|
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
|
|
||||||
|
|
||||||
#STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
#STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||||
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
|
@ -86,18 +95,18 @@ CSWAPKERNEL = swap_vfp.S
|
||||||
ZSWAPKERNEL = swap_vfp.S
|
ZSWAPKERNEL = swap_vfp.S
|
||||||
|
|
||||||
# BAD SGEMVNKERNEL = gemv_n_vfp.S
|
# BAD SGEMVNKERNEL = gemv_n_vfp.S
|
||||||
DGEMVNKERNEL = gemv_n_vfp.S
|
# BAD DGEMVNKERNEL = gemv_n_vfp.S
|
||||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||||
|
|
||||||
# BAD SGEMVTKERNEL = gemv_t_vfp.S
|
# BAD SGEMVTKERNEL = gemv_t_vfp.S
|
||||||
DGEMVTKERNEL = gemv_t_vfp.S
|
# BAD DGEMVTKERNEL = gemv_t_vfp.S
|
||||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||||
|
|
||||||
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
||||||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
||||||
# CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
#CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
||||||
|
|
|
@ -7,15 +7,19 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
|
|
||||||
DGEMMINCOPY = dgemm_ncopy_2.S
|
|
||||||
DGEMMITCOPY = dgemm_tcopy_2.S
|
DGEMMKERNEL = gemm_kernel_4x4_core2.S
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
DGEMMINCOPY =
|
||||||
DGEMMOTCOPY = dgemm_tcopy_8.S
|
DGEMMITCOPY =
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPY = gemm_ncopy_4.S
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||||
|
DGEMMINCOPYOBJ =
|
||||||
|
DGEMMITCOPYOBJ =
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||||
CGEMMINCOPY = zgemm_ncopy_2.S
|
CGEMMINCOPY = zgemm_ncopy_2.S
|
||||||
CGEMMITCOPY = zgemm_tcopy_2.S
|
CGEMMITCOPY = zgemm_tcopy_2.S
|
||||||
|
@ -40,10 +44,11 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
|
||||||
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
|
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
|
||||||
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
|
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
|
||||||
|
|
||||||
DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
|
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
|
||||||
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
|
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
|
||||||
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
|
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
|
||||||
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
|
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S
|
||||||
|
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
|
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
|
||||||
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S
|
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S
|
||||||
|
|
|
@ -1,34 +1,35 @@
|
||||||
SGEMMKERNEL = sgemm_kernel_8x8_sandy.S
|
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
|
||||||
SGEMMINCOPY =
|
SGEMMINCOPY = gemm_ncopy_4.S
|
||||||
SGEMMITCOPY =
|
SGEMMITCOPY = gemm_tcopy_4.S
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||||
SGEMMINCOPYOBJ =
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ =
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
|
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
|
||||||
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
#DGEMMONCOPY = gemm_ncopy_4.S
|
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
#DGEMMOTCOPY = gemm_tcopy_4.S
|
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
|
||||||
CGEMMKERNEL = cgemm_kernel_4x8_sandy.S
|
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c
|
CGEMMINCOPY = zgemm_ncopy_2.S
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c
|
CGEMMITCOPY = zgemm_tcopy_2.S
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
|
|
||||||
|
|
||||||
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
|
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
|
||||||
ZGEMMINCOPY =
|
ZGEMMINCOPY =
|
||||||
ZGEMMITCOPY =
|
ZGEMMITCOPY =
|
||||||
|
@ -58,6 +59,7 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
|
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
|
||||||
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
|
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
|
||||||
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S
|
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S
|
||||||
|
|
||||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
|
|
|
@ -0,0 +1,19 @@
|
||||||
|
========================================================================================
|
||||||
|
2014/05/07 Saar
|
||||||
|
|
||||||
|
Platform: BULLDOZER single thread
|
||||||
|
|
||||||
|
|
||||||
|
--> LAPACK TESTING SUMMARY <--
|
||||||
|
Processing LAPACK Testing output found in the TESTING direcory
|
||||||
|
SUMMARY nb test run numerical error other error
|
||||||
|
================ =========== ================= ================
|
||||||
|
REAL 1079349 0 (0.000%) 0 (0.000%)
|
||||||
|
DOUBLE PRECISION 1080161 0 (0.000%) 0 (0.000%)
|
||||||
|
COMPLEX 556022 0 (0.000%) 0 (0.000%)
|
||||||
|
COMPLEX16 556834 0 (0.000%) 0 (0.000%)
|
||||||
|
|
||||||
|
--> ALL PRECISIONS 3272366 0 (0.000%) 0 (0.000%)
|
||||||
|
|
||||||
|
========================================================================================
|
||||||
|
|
|
@ -54,9 +54,9 @@ include ../make.inc
|
||||||
#
|
#
|
||||||
#######################################################################
|
#######################################################################
|
||||||
|
|
||||||
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla_array.o iparmq.o \
|
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \
|
||||||
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
|
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
|
||||||
../INSTALL/ilaver.o
|
../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o
|
||||||
|
|
||||||
SCLAUX = \
|
SCLAUX = \
|
||||||
sbdsdc.o \
|
sbdsdc.o \
|
||||||
|
@ -92,7 +92,7 @@ DZLAUX = \
|
||||||
dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
|
dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
|
||||||
dsteqr.o dsterf.o dlaisnan.o disnan.o \
|
dsteqr.o dsterf.o dlaisnan.o disnan.o \
|
||||||
dlartgp.o dlartgs.o \
|
dlartgp.o dlartgs.o \
|
||||||
../INSTALL/dsecnd_$(TIMER).o
|
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
|
||||||
|
|
||||||
SLASRC = \
|
SLASRC = \
|
||||||
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
|
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
|
||||||
|
@ -101,7 +101,7 @@ SLASRC = \
|
||||||
sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
|
sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
|
||||||
sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
|
sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
|
||||||
sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
|
sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
|
||||||
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
|
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesvd.o sgesvx.o \
|
||||||
sgetc2.o sgetri.o \
|
sgetc2.o sgetri.o \
|
||||||
sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
|
sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
|
||||||
sggglm.o sgghrd.o sgglse.o sggqrf.o \
|
sggglm.o sgghrd.o sgglse.o sggqrf.o \
|
||||||
|
@ -120,7 +120,7 @@ SLASRC = \
|
||||||
slarrv.o slartv.o \
|
slarrv.o slartv.o \
|
||||||
slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \
|
slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \
|
||||||
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
|
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
|
||||||
sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
||||||
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
|
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
|
||||||
sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
|
sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
|
||||||
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
|
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
|
||||||
|
@ -147,7 +147,7 @@ SLASRC = \
|
||||||
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
|
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
|
||||||
stptrs.o \
|
stptrs.o \
|
||||||
strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
|
strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
|
||||||
strtrs.o stzrqf.o stzrzf.o sstemr.o \
|
strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
|
||||||
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
|
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
|
||||||
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
|
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
|
||||||
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
|
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
|
||||||
|
@ -176,7 +176,7 @@ CLASRC = \
|
||||||
cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
|
cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
|
||||||
cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
|
cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
|
||||||
cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
|
cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
|
||||||
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
|
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesvd.o \
|
||||||
cgesvx.o cgetc2.o cgetri.o \
|
cgesvx.o cgetc2.o cgetri.o \
|
||||||
cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
|
cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
|
||||||
cgghrd.o cgglse.o cggqrf.o cggrqf.o \
|
cgghrd.o cgglse.o cggqrf.o cggrqf.o \
|
||||||
|
@ -208,7 +208,7 @@ CLASRC = \
|
||||||
clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
|
clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
|
||||||
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
|
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
|
||||||
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
||||||
clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
||||||
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
||||||
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
|
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
|
||||||
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
||||||
|
@ -225,7 +225,7 @@ CLASRC = \
|
||||||
ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
|
ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
|
||||||
ctprfs.o ctptri.o \
|
ctprfs.o ctptri.o \
|
||||||
ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
|
ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
|
||||||
ctrsyl.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
||||||
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
|
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
|
||||||
cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
|
cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
|
||||||
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
|
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
|
||||||
|
@ -261,7 +261,7 @@ DLASRC = \
|
||||||
dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
|
dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
|
||||||
dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
|
dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
|
||||||
dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
|
dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
|
||||||
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
|
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesvd.o dgesvx.o \
|
||||||
dgetc2.o dgetri.o \
|
dgetc2.o dgetri.o \
|
||||||
dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
|
dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
|
||||||
dggglm.o dgghrd.o dgglse.o dggqrf.o \
|
dggglm.o dgghrd.o dgglse.o dggqrf.o \
|
||||||
|
@ -279,8 +279,8 @@ DLASRC = \
|
||||||
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
|
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
|
||||||
dlargv.o dlarrv.o dlartv.o \
|
dlargv.o dlarrv.o dlartv.o \
|
||||||
dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \
|
dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \
|
||||||
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o \
|
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
|
||||||
dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
||||||
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
|
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
|
||||||
dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
|
dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
|
||||||
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
|
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
|
||||||
|
@ -307,7 +307,7 @@ DLASRC = \
|
||||||
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
|
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
|
||||||
dtptrs.o \
|
dtptrs.o \
|
||||||
dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
|
dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
|
||||||
dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
||||||
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
|
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
|
||||||
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
|
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
|
||||||
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
|
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
|
||||||
|
@ -335,7 +335,7 @@ ZLASRC = \
|
||||||
zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
|
zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
|
||||||
zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
|
zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
|
||||||
zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
|
zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
|
||||||
zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o \
|
zgesc2.o zgesdd.o zgesvd.o zgesvx.o zgetc2.o \
|
||||||
zgetri.o \
|
zgetri.o \
|
||||||
zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
|
zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
|
||||||
zgghrd.o zgglse.o zggqrf.o zggrqf.o \
|
zgghrd.o zgglse.o zggqrf.o zggrqf.o \
|
||||||
|
@ -370,7 +370,7 @@ ZLASRC = \
|
||||||
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
||||||
zlassq.o zlasyf.o zlasyf_rook.o \
|
zlassq.o zlasyf.o zlasyf_rook.o \
|
||||||
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
|
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
|
||||||
zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
||||||
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
||||||
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
|
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
|
||||||
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
||||||
|
@ -387,7 +387,7 @@ ZLASRC = \
|
||||||
ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
|
ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
|
||||||
ztprfs.o ztptri.o \
|
ztprfs.o ztptri.o \
|
||||||
ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
|
ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
|
||||||
ztrsyl.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
||||||
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
|
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
|
||||||
zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
|
zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
|
||||||
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
|
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
|
||||||
|
@ -417,8 +417,6 @@ endif
|
||||||
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
|
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
|
||||||
$(SCLAUX) $(DZLAUX) $(ALLAUX)
|
$(SCLAUX) $(DZLAUX) $(ALLAUX)
|
||||||
|
|
||||||
ALLOBJ_P = $(ALLOBJ:.o=.$(PSUFFIX))
|
|
||||||
|
|
||||||
ifdef USEXBLAS
|
ifdef USEXBLAS
|
||||||
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||||
endif
|
endif
|
||||||
|
@ -435,6 +433,7 @@ lapacklib: $(ALLOBJ) $(ALLXOBJ)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
|
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
|
||||||
$(RANLIB) $@
|
$(RANLIB) $@
|
||||||
|
|
||||||
|
|
||||||
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
|
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
|
||||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
|
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
|
||||||
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
||||||
|
@ -483,16 +482,11 @@ clean:
|
||||||
%.$(PSUFFIX): %.f
|
%.$(PSUFFIX): %.f
|
||||||
$(FORTRAN) $(POPTS) -c $< -o $@
|
$(FORTRAN) $(POPTS) -c $< -o $@
|
||||||
|
|
||||||
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
|
||||||
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
|
||||||
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
|
||||||
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
|
||||||
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
|
||||||
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
|
||||||
|
|
||||||
slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||||
dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||||
sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||||
dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||||
cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||||
zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||||
|
|
||||||
|
|
|
@ -5,5 +5,5 @@ Data file for testing COMPLEX LAPACK linear equation routines RFP format
|
||||||
1 2 15 Values of NRHS (number of right hand sides)
|
1 2 15 Values of NRHS (number of right hand sides)
|
||||||
9 Number of matrix types (list types on next line if 0 < NTYPES < 9)
|
9 Number of matrix types (list types on next line if 0 < NTYPES < 9)
|
||||||
1 2 3 4 5 6 7 8 9 Matrix Types
|
1 2 3 4 5 6 7 8 9 Matrix Types
|
||||||
30.0 Threshold value of test ratio
|
50.0 Threshold value of test ratio
|
||||||
T Put T to test the error exits
|
T Put T to test the error exits
|
||||||
|
|
|
@ -7,7 +7,7 @@ SVD: Data file for testing Singular Value Decomposition routines
|
||||||
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
||||||
1 0 5 9 1 Values of NX (crossover point)
|
1 0 5 9 1 Values of NX (crossover point)
|
||||||
2 0 2 2 2 Values of NRHS
|
2 0 2 2 2 Values of NRHS
|
||||||
50.0 Threshold value
|
54.0 Threshold value
|
||||||
T Put T to test the LAPACK routines
|
T Put T to test the LAPACK routines
|
||||||
T Put T to test the driver routines
|
T Put T to test the driver routines
|
||||||
T Put T to test the error exits
|
T Put T to test the error exits
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
TOPDIR = ..
|
TOPDIR = ..
|
||||||
include ../Makefile.system
|
include ../Makefile.system
|
||||||
|
|
||||||
SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
||||||
|
SUBDIRS = getrf getf2 laswp getrs potrf potf2
|
||||||
|
|
||||||
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2
|
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2
|
||||||
|
|
||||||
|
|
6
make.inc
6
make.inc
|
@ -5,7 +5,7 @@ LOADER = $(FORTRAN)
|
||||||
TIMER = NONE
|
TIMER = NONE
|
||||||
ARCHFLAGS= -ru
|
ARCHFLAGS= -ru
|
||||||
#RANLIB = ranlib
|
#RANLIB = ranlib
|
||||||
BLASLIB =
|
BLASLIB = ../../../libopenblas.a
|
||||||
TMGLIB = tmglib.a
|
TMGLIB = tmglib.a
|
||||||
EIGSRCLIB = eigsrc.a
|
#EIGSRCLIB = eigsrc.a
|
||||||
LINSRCLIB = linsrc.a
|
#LINSRCLIB = linsrc.a
|
||||||
|
|
48
param.h
48
param.h
|
@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||||
#else
|
#else
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -1104,10 +1104,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||||
#else
|
#else
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||||
|
|
||||||
|
@ -2021,6 +2021,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(ARMV5)
|
||||||
|
#define SNUMOPT 2
|
||||||
|
#define DNUMOPT 2
|
||||||
|
|
||||||
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
|
#define GEMM_DEFAULT_OFFSET_B 0
|
||||||
|
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||||
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_P 128
|
||||||
|
#define DGEMM_DEFAULT_P 128
|
||||||
|
#define CGEMM_DEFAULT_P 96
|
||||||
|
#define ZGEMM_DEFAULT_P 64
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_Q 240
|
||||||
|
#define DGEMM_DEFAULT_Q 120
|
||||||
|
#define CGEMM_DEFAULT_Q 120
|
||||||
|
#define ZGEMM_DEFAULT_Q 120
|
||||||
|
|
||||||
|
#define SGEMM_DEFAULT_R 12288
|
||||||
|
#define DGEMM_DEFAULT_R 8192
|
||||||
|
#define CGEMM_DEFAULT_R 4096
|
||||||
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
|
|
||||||
|
#define SYMV_P 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef GENERIC
|
#ifdef GENERIC
|
||||||
|
|
Loading…
Reference in New Issue