Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS into wernsaar-develop
Conflicts: kernel/arm/KERNEL.ARMV6
This commit is contained in:
commit
406f5bd22b
9
Makefile
9
Makefile
|
@ -262,11 +262,10 @@ endif
|
|||
|
||||
|
||||
lapack-test :
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
|
||||
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
|
||||
dummy :
|
||||
|
||||
|
|
|
@ -10,3 +10,9 @@ FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
|||
endif
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV5)
|
||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
|
||||
endif
|
||||
|
||||
|
||||
|
|
|
@ -76,10 +76,10 @@ VERSION = 0.2.9.rc2
|
|||
# Unfortunately most of kernel won't give us high quality buffer.
|
||||
# BLAS tries to find the best region before entering main function,
|
||||
# but it will consume time. If you don't like it, you can disable one.
|
||||
# NO_WARMUP = 1
|
||||
NO_WARMUP = 1
|
||||
|
||||
# If you want to disable CPU/Memory affinity on Linux.
|
||||
# NO_AFFINITY = 1
|
||||
NO_AFFINITY = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
|
@ -129,6 +129,9 @@ VERSION = 0.2.9.rc2
|
|||
# The default -O2 is enough.
|
||||
# COMMON_OPT = -O2
|
||||
|
||||
# gfortran option for LAPACK
|
||||
FCOMMON_OPT = -frecursive
|
||||
|
||||
# Profiling flags
|
||||
COMMON_PROF = -pg
|
||||
|
||||
|
|
|
@ -158,6 +158,7 @@ endif
|
|||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
EXTRALIB += -lm
|
||||
NO_EXPRECISION = 1
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
|
@ -846,19 +847,6 @@ ifeq ($(DEBUG), 1)
|
|||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
ifeq ($(ARCH), arm64)
|
||||
COMMON_OPT = -O3
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
|
|
9
common.h
9
common.h
|
@ -310,10 +310,17 @@ typedef int blasint;
|
|||
#define YIELDING SwitchToThread()
|
||||
#endif
|
||||
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
|
||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
|
||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
|
||||
#endif
|
||||
|
||||
#ifdef BULLDOZER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef PILEDRIVER
|
||||
#ifndef YIELDING
|
||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
|
||||
|
|
|
@ -3,7 +3,7 @@ include ../../Makefile.system
|
|||
|
||||
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
|
||||
|
||||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
|
||||
|
||||
ifdef SMP
|
||||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
|
||||
|
|
16
getarch.c
16
getarch.c
|
@ -724,6 +724,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
#endif
|
||||
|
||||
#ifdef FORCE_ARMV5
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM"
|
||||
#define SUBARCHITECTURE "ARMV5"
|
||||
#define SUBDIRNAME "arm"
|
||||
#define ARCHCONFIG "-DARMV5 " \
|
||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
|
||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
|
||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
|
||||
"-DHAVE_VFP"
|
||||
#define LIBNAME "armv5"
|
||||
#define CORENAME "ARMV5"
|
||||
#else
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef FORCE_ARMV8
|
||||
#define FORCE
|
||||
#define ARCHITECTURE "ARM64"
|
||||
|
|
|
@ -2,11 +2,11 @@ TOPDIR = ..
|
|||
include $(TOPDIR)/Makefile.system
|
||||
|
||||
ifeq ($(ARCH), x86)
|
||||
SUPPORT_GEMM3M = 1
|
||||
SUPPORT_GEMM3M = 0
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), x86_64)
|
||||
SUPPORT_GEMM3M = 1
|
||||
SUPPORT_GEMM3M = 0
|
||||
endif
|
||||
|
||||
ifeq ($(ARCH), ia64)
|
||||
|
@ -342,30 +342,51 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
|
|||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
|
||||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||
|
||||
#SLAPACKOBJS = \
|
||||
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
|
||||
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
|
||||
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
|
||||
|
||||
SLAPACKOBJS = \
|
||||
sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
|
||||
spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
|
||||
slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
|
||||
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
||||
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX)
|
||||
|
||||
|
||||
#DLAPACKOBJS = \
|
||||
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
|
||||
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
|
||||
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
|
||||
|
||||
DLAPACKOBJS = \
|
||||
dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
|
||||
dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
|
||||
dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
|
||||
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
|
||||
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX)
|
||||
|
||||
|
||||
QLAPACKOBJS = \
|
||||
qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \
|
||||
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
|
||||
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \
|
||||
|
||||
#CLAPACKOBJS = \
|
||||
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
|
||||
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
|
||||
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
|
||||
|
||||
CLAPACKOBJS = \
|
||||
cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
|
||||
cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
|
||||
claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
|
||||
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
|
||||
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX)
|
||||
|
||||
|
||||
#ZLAPACKOBJS = \
|
||||
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
|
||||
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
|
||||
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
|
||||
|
||||
ZLAPACKOBJS = \
|
||||
zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
|
||||
zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
|
||||
zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
|
||||
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
|
||||
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX)
|
||||
|
||||
|
||||
|
||||
XLAPACKOBJS = \
|
||||
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
|
||||
|
@ -375,10 +396,10 @@ XLAPACKOBJS = \
|
|||
ifneq ($(NO_LAPACK), 1)
|
||||
SBLASOBJS += $(SLAPACKOBJS)
|
||||
DBLASOBJS += $(DLAPACKOBJS)
|
||||
QBLASOBJS += $(QLAPACKOBJS)
|
||||
#QBLASOBJS += $(QLAPACKOBJS)
|
||||
CBLASOBJS += $(CLAPACKOBJS)
|
||||
ZBLASOBJS += $(ZLAPACKOBJS)
|
||||
XBLASOBJS += $(XLAPACKOBJS)
|
||||
#XBLASOBJS += $(XLAPACKOBJS)
|
||||
|
||||
endif
|
||||
|
||||
|
@ -1731,37 +1752,37 @@ cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c
|
|||
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
|
||||
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)
|
||||
|
||||
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : getf2.c
|
||||
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : getf2.c
|
||||
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : zgetf2.c
|
||||
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : zgetf2.c
|
||||
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : getrf.c
|
||||
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : getrf.c
|
||||
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : zgetrf.c
|
||||
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : zgetrf.c
|
||||
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
|
||||
|
@ -1803,37 +1824,37 @@ zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c
|
|||
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : potf2.c
|
||||
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : potf2.c
|
||||
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : zpotf2.c
|
||||
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : zpotf2.c
|
||||
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : potrf.c
|
||||
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : potrf.c
|
||||
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : zpotrf.c
|
||||
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : zpotrf.c
|
||||
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
|
||||
|
@ -1875,55 +1896,55 @@ ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c
|
|||
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : laswp.c
|
||||
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : laswp.c
|
||||
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
claswp.$(SUFFIX) claswp.$(PSUFFIX) : zlaswp.c
|
||||
claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : zlaswp.c
|
||||
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : getrs.c
|
||||
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : getrs.c
|
||||
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : zgetrs.c
|
||||
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : zgetrs.c
|
||||
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : gesv.c
|
||||
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : gesv.c
|
||||
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : gesv.c
|
||||
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : gesv.c
|
||||
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c
|
||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||
|
||||
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c
|
||||
|
|
|
@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
qsbmv_thread_U, qsbmv_thread_L,
|
||||
|
@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -130,7 +130,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
FLOAT *buffer;
|
||||
int uplo;
|
||||
blasint info;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
} else {
|
||||
|
||||
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||
|
|
|
@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLAS
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
qspmv_thread_U, qspmv_thread_L,
|
||||
|
@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
|||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -126,7 +126,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
FLOAT *buffer;
|
||||
int uplo;
|
||||
blasint info;
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
(spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
} else {
|
||||
|
||||
(spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads);
|
||||
|
|
|
@ -145,12 +145,21 @@ void NAME(char *UPLO, char *TRANS,
|
|||
if (uplo_arg == 'U') uplo = 0;
|
||||
if (uplo_arg == 'L') uplo = 1;
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
#ifndef HEMM
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
if (trans_arg == 'R') trans = 0;
|
||||
#endif
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
#ifdef HEMM
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
nrowa = args.n;
|
||||
if (trans & 1) nrowa = args.k;
|
||||
|
|
|
@ -148,12 +148,21 @@ void NAME(char *UPLO, char *TRANS,
|
|||
if (uplo_arg == 'U') uplo = 0;
|
||||
if (uplo_arg == 'L') uplo = 1;
|
||||
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
#ifndef HEMM
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
if (trans_arg == 'R') trans = 0;
|
||||
#endif
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
#ifdef HEMM
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'C') trans = 1;
|
||||
#else
|
||||
if (trans_arg == 'N') trans = 0;
|
||||
if (trans_arg == 'T') trans = 1;
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
nrowa = args.n;
|
||||
if (trans & 1) nrowa = args.k;
|
||||
|
|
|
@ -61,7 +61,7 @@ static int (*hbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M,
|
||||
|
@ -92,7 +92,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -138,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
FLOAT *buffer;
|
||||
int uplo;
|
||||
blasint info;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -205,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
|
|||
|
||||
(hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
} else {
|
||||
|
||||
(hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||
|
|
|
@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xsbmv_thread_U, xsbmv_thread_L,
|
||||
|
@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -131,7 +131,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -139,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
|||
|
||||
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPBUG
|
||||
} else {
|
||||
|
||||
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
|
||||
|
|
|
@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT
|
|||
#endif
|
||||
};
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||
#ifdef XDOUBLE
|
||||
xspmv_thread_U, xspmv_thread_L,
|
||||
|
@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
|||
blasint info;
|
||||
int uplo;
|
||||
FLOAT *buffer;
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
|
@ -127,7 +127,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
|||
|
||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
nthreads = num_cpu_avail(2);
|
||||
|
||||
if (nthreads == 1) {
|
||||
|
@ -135,7 +135,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
|
|||
|
||||
(spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer);
|
||||
|
||||
#ifdef SMP
|
||||
#ifdef SMPTEST
|
||||
|
||||
} else {
|
||||
|
||||
|
|
|
@ -0,0 +1,134 @@
|
|||
SAMAXKERNEL = ../arm/amax.c
|
||||
DAMAXKERNEL = ../arm/amax.c
|
||||
CAMAXKERNEL = ../arm/zamax.c
|
||||
ZAMAXKERNEL = ../arm/zamax.c
|
||||
|
||||
SAMINKERNEL = ../arm/amin.c
|
||||
DAMINKERNEL = ../arm/amin.c
|
||||
CAMINKERNEL = ../arm/zamin.c
|
||||
ZAMINKERNEL = ../arm/zamin.c
|
||||
|
||||
SMAXKERNEL = ../arm/max.c
|
||||
DMAXKERNEL = ../arm/max.c
|
||||
|
||||
SMINKERNEL = ../arm/min.c
|
||||
DMINKERNEL = ../arm/min.c
|
||||
|
||||
ISAMAXKERNEL = ../arm/iamax.c
|
||||
IDAMAXKERNEL = ../arm/iamax.c
|
||||
ICAMAXKERNEL = ../arm/izamax.c
|
||||
IZAMAXKERNEL = ../arm/izamax.c
|
||||
|
||||
ISAMINKERNEL = ../arm/iamin.c
|
||||
IDAMINKERNEL = ../arm/iamin.c
|
||||
ICAMINKERNEL = ../arm/izamin.c
|
||||
IZAMINKERNEL = ../arm/izamin.c
|
||||
|
||||
ISMAXKERNEL = ../arm/imax.c
|
||||
IDMAXKERNEL = ../arm/imax.c
|
||||
|
||||
ISMINKERNEL = ../arm/imin.c
|
||||
IDMINKERNEL = ../arm/imin.c
|
||||
|
||||
SASUMKERNEL = ../arm/asum.c
|
||||
DASUMKERNEL = ../arm/asum.c
|
||||
CASUMKERNEL = ../arm/zasum.c
|
||||
ZASUMKERNEL = ../arm/zasum.c
|
||||
|
||||
SAXPYKERNEL = ../arm/axpy.c
|
||||
DAXPYKERNEL = ../arm/axpy.c
|
||||
CAXPYKERNEL = ../arm/zaxpy.c
|
||||
ZAXPYKERNEL = ../arm/zaxpy.c
|
||||
|
||||
SCOPYKERNEL = ../arm/copy.c
|
||||
DCOPYKERNEL = ../arm/copy.c
|
||||
CCOPYKERNEL = ../arm/zcopy.c
|
||||
ZCOPYKERNEL = ../arm/zcopy.c
|
||||
|
||||
SDOTKERNEL = ../arm/dot.c
|
||||
DDOTKERNEL = ../arm/dot.c
|
||||
CDOTKERNEL = ../arm/zdot.c
|
||||
ZDOTKERNEL = ../arm/zdot.c
|
||||
|
||||
SNRM2KERNEL = ../arm/nrm2.c
|
||||
DNRM2KERNEL = ../arm/nrm2.c
|
||||
CNRM2KERNEL = ../arm/znrm2.c
|
||||
ZNRM2KERNEL = ../arm/znrm2.c
|
||||
|
||||
SROTKERNEL = ../arm/rot.c
|
||||
DROTKERNEL = ../arm/rot.c
|
||||
CROTKERNEL = ../arm/zrot.c
|
||||
ZROTKERNEL = ../arm/zrot.c
|
||||
|
||||
SSCALKERNEL = ../arm/scal.c
|
||||
DSCALKERNEL = ../arm/scal.c
|
||||
CSCALKERNEL = ../arm/zscal.c
|
||||
ZSCALKERNEL = ../arm/zscal.c
|
||||
|
||||
SSWAPKERNEL = ../arm/swap.c
|
||||
DSWAPKERNEL = ../arm/swap.c
|
||||
CSWAPKERNEL = ../arm/zswap.c
|
||||
ZSWAPKERNEL = ../arm/zswap.c
|
||||
|
||||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
DGEMVNKERNEL = ../arm/gemv_n.c
|
||||
CGEMVNKERNEL = ../arm/zgemv_n.c
|
||||
ZGEMVNKERNEL = ../arm/zgemv_n.c
|
||||
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
DGEMVTKERNEL = ../arm/gemv_t.c
|
||||
CGEMVTKERNEL = ../arm/zgemv_t.c
|
||||
ZGEMVTKERNEL = ../arm/zgemv_t.c
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
|
||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||
|
||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||
|
||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,11 +1,20 @@
|
|||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
|
||||
DGEMVNKERNEL = ../arm/gemv_n.c
|
||||
DGEMVTKERNEL = ../arm/gemv_t.c
|
||||
|
||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
|
||||
#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||
|
||||
|
||||
#STRMMKERNEL = ../generic/trmmkernel_2x2.c
|
||||
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||
|
@ -86,12 +95,12 @@ CSWAPKERNEL = swap_vfp.S
|
|||
ZSWAPKERNEL = swap_vfp.S
|
||||
|
||||
# BAD SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
# BAD DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
# BAD SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
# BAD DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
|
|
|
@ -7,15 +7,19 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
|||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
|
||||
DGEMMINCOPY = dgemm_ncopy_2.S
|
||||
DGEMMITCOPY = dgemm_tcopy_2.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMOTCOPY = dgemm_tcopy_8.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
DGEMMKERNEL = gemm_kernel_4x4_core2.S
|
||||
DGEMMINCOPY =
|
||||
DGEMMITCOPY =
|
||||
DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ =
|
||||
DGEMMITCOPYOBJ =
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||
CGEMMINCOPY = zgemm_ncopy_2.S
|
||||
CGEMMITCOPY = zgemm_tcopy_2.S
|
||||
|
@ -40,10 +44,11 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
|
|||
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
|
||||
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
|
||||
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
|
||||
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
|
||||
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
|
||||
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S
|
||||
|
||||
|
||||
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
|
||||
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S
|
||||
|
|
|
@ -1,34 +1,35 @@
|
|||
SGEMMKERNEL = sgemm_kernel_8x8_sandy.S
|
||||
SGEMMINCOPY =
|
||||
SGEMMITCOPY =
|
||||
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
|
||||
SGEMMINCOPY = gemm_ncopy_4.S
|
||||
SGEMMITCOPY = gemm_tcopy_4.S
|
||||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
|
||||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
|
||||
SGEMMINCOPYOBJ =
|
||||
SGEMMITCOPYOBJ =
|
||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
|
||||
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
|
||||
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||
#DGEMMONCOPY = gemm_ncopy_4.S
|
||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||
#DGEMMOTCOPY = gemm_tcopy_4.S
|
||||
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||
CGEMMKERNEL = cgemm_kernel_4x8_sandy.S
|
||||
CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c
|
||||
CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c
|
||||
|
||||
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
|
||||
CGEMMINCOPY = zgemm_ncopy_2.S
|
||||
CGEMMITCOPY = zgemm_tcopy_2.S
|
||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||
#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
|
||||
|
||||
|
||||
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
|
||||
ZGEMMINCOPY =
|
||||
ZGEMMITCOPY =
|
||||
|
@ -58,6 +59,7 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
|||
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
|
||||
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
|
||||
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S
|
||||
|
||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
========================================================================================
|
||||
2014/05/07 Saar
|
||||
|
||||
Platform: BULLDOZER single thread
|
||||
|
||||
|
||||
--> LAPACK TESTING SUMMARY <--
|
||||
Processing LAPACK Testing output found in the TESTING direcory
|
||||
SUMMARY nb test run numerical error other error
|
||||
================ =========== ================= ================
|
||||
REAL 1079349 0 (0.000%) 0 (0.000%)
|
||||
DOUBLE PRECISION 1080161 0 (0.000%) 0 (0.000%)
|
||||
COMPLEX 556022 0 (0.000%) 0 (0.000%)
|
||||
COMPLEX16 556834 0 (0.000%) 0 (0.000%)
|
||||
|
||||
--> ALL PRECISIONS 3272366 0 (0.000%) 0 (0.000%)
|
||||
|
||||
========================================================================================
|
||||
|
|
@ -54,9 +54,9 @@ include ../make.inc
|
|||
#
|
||||
#######################################################################
|
||||
|
||||
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla_array.o iparmq.o \
|
||||
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \
|
||||
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
|
||||
../INSTALL/ilaver.o
|
||||
../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o
|
||||
|
||||
SCLAUX = \
|
||||
sbdsdc.o \
|
||||
|
@ -92,7 +92,7 @@ DZLAUX = \
|
|||
dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
|
||||
dsteqr.o dsterf.o dlaisnan.o disnan.o \
|
||||
dlartgp.o dlartgs.o \
|
||||
../INSTALL/dsecnd_$(TIMER).o
|
||||
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
|
||||
|
||||
SLASRC = \
|
||||
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
|
||||
|
@ -101,7 +101,7 @@ SLASRC = \
|
|||
sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
|
||||
sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
|
||||
sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
|
||||
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
|
||||
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesvd.o sgesvx.o \
|
||||
sgetc2.o sgetri.o \
|
||||
sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
|
||||
sggglm.o sgghrd.o sgglse.o sggqrf.o \
|
||||
|
@ -120,7 +120,7 @@ SLASRC = \
|
|||
slarrv.o slartv.o \
|
||||
slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \
|
||||
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
|
||||
sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
||||
slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
||||
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
|
||||
sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
|
||||
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
|
||||
|
@ -147,7 +147,7 @@ SLASRC = \
|
|||
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
|
||||
stptrs.o \
|
||||
strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
|
||||
strtrs.o stzrqf.o stzrzf.o sstemr.o \
|
||||
strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
|
||||
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
|
||||
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
|
||||
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
|
||||
|
@ -176,7 +176,7 @@ CLASRC = \
|
|||
cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
|
||||
cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
|
||||
cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
|
||||
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
|
||||
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesvd.o \
|
||||
cgesvx.o cgetc2.o cgetri.o \
|
||||
cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
|
||||
cgghrd.o cgglse.o cggqrf.o cggrqf.o \
|
||||
|
@ -208,7 +208,7 @@ CLASRC = \
|
|||
clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
|
||||
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
|
||||
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
||||
clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
||||
clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
||||
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
||||
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
|
||||
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
||||
|
@ -225,7 +225,7 @@ CLASRC = \
|
|||
ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
|
||||
ctprfs.o ctptri.o \
|
||||
ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
|
||||
ctrsyl.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
||||
ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
||||
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
|
||||
cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
|
||||
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
|
||||
|
@ -261,7 +261,7 @@ DLASRC = \
|
|||
dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
|
||||
dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
|
||||
dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
|
||||
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
|
||||
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesvd.o dgesvx.o \
|
||||
dgetc2.o dgetri.o \
|
||||
dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
|
||||
dggglm.o dgghrd.o dgglse.o dggqrf.o \
|
||||
|
@ -279,8 +279,8 @@ DLASRC = \
|
|||
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
|
||||
dlargv.o dlarrv.o dlartv.o \
|
||||
dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \
|
||||
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o \
|
||||
dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
||||
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
|
||||
dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
||||
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
|
||||
dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
|
||||
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
|
||||
|
@ -307,7 +307,7 @@ DLASRC = \
|
|||
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
|
||||
dtptrs.o \
|
||||
dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
|
||||
dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
||||
dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
||||
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
|
||||
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
|
||||
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
|
||||
|
@ -335,7 +335,7 @@ ZLASRC = \
|
|||
zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
|
||||
zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
|
||||
zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
|
||||
zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o \
|
||||
zgesc2.o zgesdd.o zgesvd.o zgesvx.o zgetc2.o \
|
||||
zgetri.o \
|
||||
zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
|
||||
zgghrd.o zgglse.o zggqrf.o zggrqf.o \
|
||||
|
@ -370,7 +370,7 @@ ZLASRC = \
|
|||
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
||||
zlassq.o zlasyf.o zlasyf_rook.o \
|
||||
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
|
||||
zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
||||
zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
||||
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
||||
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
|
||||
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
||||
|
@ -387,7 +387,7 @@ ZLASRC = \
|
|||
ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
|
||||
ztprfs.o ztptri.o \
|
||||
ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
|
||||
ztrsyl.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
||||
ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
||||
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
|
||||
zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
|
||||
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
|
||||
|
@ -417,8 +417,6 @@ endif
|
|||
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
|
||||
$(SCLAUX) $(DZLAUX) $(ALLAUX)
|
||||
|
||||
ALLOBJ_P = $(ALLOBJ:.o=.$(PSUFFIX))
|
||||
|
||||
ifdef USEXBLAS
|
||||
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||
endif
|
||||
|
@ -435,6 +433,7 @@ lapacklib: $(ALLOBJ) $(ALLXOBJ)
|
|||
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
|
||||
$(RANLIB) $@
|
||||
|
||||
|
||||
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
|
||||
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
|
||||
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
|
||||
|
@ -483,16 +482,11 @@ clean:
|
|||
%.$(PSUFFIX): %.f
|
||||
$(FORTRAN) $(POPTS) -c $< -o $@
|
||||
|
||||
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
|
||||
|
||||
slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
|
||||
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
|
||||
|
||||
|
|
|
@ -5,5 +5,5 @@ Data file for testing COMPLEX LAPACK linear equation routines RFP format
|
|||
1 2 15 Values of NRHS (number of right hand sides)
|
||||
9 Number of matrix types (list types on next line if 0 < NTYPES < 9)
|
||||
1 2 3 4 5 6 7 8 9 Matrix Types
|
||||
30.0 Threshold value of test ratio
|
||||
50.0 Threshold value of test ratio
|
||||
T Put T to test the error exits
|
||||
|
|
|
@ -7,7 +7,7 @@ SVD: Data file for testing Singular Value Decomposition routines
|
|||
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
||||
1 0 5 9 1 Values of NX (crossover point)
|
||||
2 0 2 2 2 Values of NRHS
|
||||
50.0 Threshold value
|
||||
54.0 Threshold value
|
||||
T Put T to test the LAPACK routines
|
||||
T Put T to test the driver routines
|
||||
T Put T to test the error exits
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
TOPDIR = ..
|
||||
include ../Makefile.system
|
||||
|
||||
SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
||||
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
||||
SUBDIRS = getrf getf2 laswp getrs potrf potf2
|
||||
|
||||
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2
|
||||
|
||||
|
|
6
make.inc
6
make.inc
|
@ -5,7 +5,7 @@ LOADER = $(FORTRAN)
|
|||
TIMER = NONE
|
||||
ARCHFLAGS= -ru
|
||||
#RANLIB = ranlib
|
||||
BLASLIB =
|
||||
BLASLIB = ../../../libopenblas.a
|
||||
TMGLIB = tmglib.a
|
||||
EIGSRCLIB = eigsrc.a
|
||||
LINSRCLIB = linsrc.a
|
||||
#EIGSRCLIB = eigsrc.a
|
||||
#LINSRCLIB = linsrc.a
|
||||
|
|
48
param.h
48
param.h
|
@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 1
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 8
|
||||
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||
#define QGEMM_DEFAULT_UNROLL_N 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 4
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -1104,10 +1104,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
#define XGEMM_DEFAULT_UNROLL_N 1
|
||||
#else
|
||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define DGEMM_DEFAULT_UNROLL_M 8
|
||||
#define QGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_M 8
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 4
|
||||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
|
||||
|
@ -2021,6 +2021,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
|
||||
#if defined(ARMV5)
|
||||
#define SNUMOPT 2
|
||||
#define DNUMOPT 2
|
||||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 0
|
||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 2
|
||||
#define SGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define ZGEMM_DEFAULT_UNROLL_M 2
|
||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||
|
||||
#define SGEMM_DEFAULT_P 128
|
||||
#define DGEMM_DEFAULT_P 128
|
||||
#define CGEMM_DEFAULT_P 96
|
||||
#define ZGEMM_DEFAULT_P 64
|
||||
|
||||
#define SGEMM_DEFAULT_Q 240
|
||||
#define DGEMM_DEFAULT_Q 120
|
||||
#define CGEMM_DEFAULT_Q 120
|
||||
#define ZGEMM_DEFAULT_Q 120
|
||||
|
||||
#define SGEMM_DEFAULT_R 12288
|
||||
#define DGEMM_DEFAULT_R 8192
|
||||
#define CGEMM_DEFAULT_R 4096
|
||||
#define ZGEMM_DEFAULT_R 4096
|
||||
|
||||
|
||||
#define SYMV_P 16
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
#ifdef GENERIC
|
||||
|
|
Loading…
Reference in New Issue