Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS into wernsaar-develop

Conflicts:
	kernel/arm/KERNEL.ARMV6
This commit is contained in:
Zhang Xianyi 2014-05-21 11:24:39 +08:00
commit 406f5bd22b
50 changed files with 428 additions and 166 deletions

View File

@ -262,11 +262,10 @@ endif
lapack-test :
$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintsts xlintstz xlintstzc
@rm -f $(NETLIB_LAPACK_DIR)/TESTING/*.out
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING
$(GREP) failed $(NETLIB_LAPACK_DIR)/TESTING/*.out
make -j 1 -C $(NETLIB_LAPACK_DIR) tmglib
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
dummy :

View File

@ -10,3 +10,9 @@ FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif
ifeq ($(CORE), ARMV5)
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6
endif

View File

@ -76,10 +76,10 @@ VERSION = 0.2.9.rc2
# Unfortunately most of kernel won't give us high quality buffer.
# BLAS tries to find the best region before entering main function,
# but it will consume time. If you don't like it, you can disable one.
# NO_WARMUP = 1
NO_WARMUP = 1
# If you want to disable CPU/Memory affinity on Linux.
# NO_AFFINITY = 1
NO_AFFINITY = 1
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
@ -129,6 +129,9 @@ VERSION = 0.2.9.rc2
# The default -O2 is enough.
# COMMON_OPT = -O2
# gfortran option for LAPACK
FCOMMON_OPT = -frecursive
# Profiling flags
COMMON_PROF = -pg

View File

@ -158,6 +158,7 @@ endif
ifeq ($(OSNAME), Linux)
EXTRALIB += -lm
NO_EXPRECISION = 1
endif
ifeq ($(OSNAME), AIX)
@ -846,19 +847,6 @@ ifeq ($(DEBUG), 1)
COMMON_OPT += -g
endif
ifndef COMMON_OPT
ifeq ($(ARCH), arm)
COMMON_OPT = -O3
endif
endif
ifndef COMMON_OPT
ifeq ($(ARCH), arm64)
COMMON_OPT = -O3
endif
endif
ifndef COMMON_OPT
COMMON_OPT = -O2
endif

View File

@ -310,10 +310,17 @@ typedef int blasint;
#define YIELDING SwitchToThread()
#endif
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8)
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5)
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n");
#endif
#ifdef BULLDOZER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");
#endif
#endif
#ifdef PILEDRIVER
#ifndef YIELDING
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n");

View File

@ -3,7 +3,7 @@ include ../../Makefile.system
COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) openblas_get_config.$(SUFFIX) openblas_get_parallel.$(SUFFIX) openblas_error_handle.$(SUFFIX)
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
#COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX)
ifdef SMP
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)

View File

@ -724,6 +724,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#endif
#ifdef FORCE_ARMV5
#define FORCE
#define ARCHITECTURE "ARM"
#define SUBARCHITECTURE "ARMV5"
#define SUBDIRNAME "arm"
#define ARCHCONFIG "-DARMV5 " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \
"-DHAVE_VFP"
#define LIBNAME "armv5"
#define CORENAME "ARMV5"
#else
#endif
#ifdef FORCE_ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"

View File

@ -2,11 +2,11 @@ TOPDIR = ..
include $(TOPDIR)/Makefile.system
ifeq ($(ARCH), x86)
SUPPORT_GEMM3M = 1
SUPPORT_GEMM3M = 0
endif
ifeq ($(ARCH), x86_64)
SUPPORT_GEMM3M = 1
SUPPORT_GEMM3M = 0
endif
ifeq ($(ARCH), ia64)
@ -342,30 +342,51 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS)
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS)
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
#SLAPACKOBJS = \
# sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
# spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
# slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
SLAPACKOBJS = \
sgetf2.$(SUFFIX) sgetrf.$(SUFFIX) slauu2.$(SUFFIX) slauum.$(SUFFIX) \
spotf2.$(SUFFIX) spotrf.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) \
slaswp.$(SUFFIX) sgetrs.$(SUFFIX) sgesv.$(SUFFIX) spotri.$(SUFFIX) \
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX)
#DLAPACKOBJS = \
# dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
# dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
# dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
DLAPACKOBJS = \
dgetf2.$(SUFFIX) dgetrf.$(SUFFIX) dlauu2.$(SUFFIX) dlauum.$(SUFFIX) \
dpotf2.$(SUFFIX) dpotrf.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) \
dlaswp.$(SUFFIX) dgetrs.$(SUFFIX) dgesv.$(SUFFIX) dpotri.$(SUFFIX) \
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX)
QLAPACKOBJS = \
qgetf2.$(SUFFIX) qgetrf.$(SUFFIX) qlauu2.$(SUFFIX) qlauum.$(SUFFIX) \
qpotf2.$(SUFFIX) qpotrf.$(SUFFIX) qtrti2.$(SUFFIX) qtrtri.$(SUFFIX) \
qlaswp.$(SUFFIX) qgetrs.$(SUFFIX) qgesv.$(SUFFIX) qpotri.$(SUFFIX) \
#CLAPACKOBJS = \
# cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
# cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
# claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
CLAPACKOBJS = \
cgetf2.$(SUFFIX) cgetrf.$(SUFFIX) clauu2.$(SUFFIX) clauum.$(SUFFIX) \
cpotf2.$(SUFFIX) cpotrf.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) \
claswp.$(SUFFIX) cgetrs.$(SUFFIX) cgesv.$(SUFFIX) cpotri.$(SUFFIX) \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX)
#ZLAPACKOBJS = \
# zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
# zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
# zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
ZLAPACKOBJS = \
zgetf2.$(SUFFIX) zgetrf.$(SUFFIX) zlauu2.$(SUFFIX) zlauum.$(SUFFIX) \
zpotf2.$(SUFFIX) zpotrf.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) \
zlaswp.$(SUFFIX) zgetrs.$(SUFFIX) zgesv.$(SUFFIX) zpotri.$(SUFFIX) \
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX)
XLAPACKOBJS = \
xgetf2.$(SUFFIX) xgetrf.$(SUFFIX) xlauu2.$(SUFFIX) xlauum.$(SUFFIX) \
@ -375,10 +396,10 @@ XLAPACKOBJS = \
ifneq ($(NO_LAPACK), 1)
SBLASOBJS += $(SLAPACKOBJS)
DBLASOBJS += $(DLAPACKOBJS)
QBLASOBJS += $(QLAPACKOBJS)
#QBLASOBJS += $(QLAPACKOBJS)
CBLASOBJS += $(CLAPACKOBJS)
ZBLASOBJS += $(ZLAPACKOBJS)
XBLASOBJS += $(XLAPACKOBJS)
#XBLASOBJS += $(XLAPACKOBJS)
endif
@ -1731,37 +1752,37 @@ cblas_cher2k.$(SUFFIX) cblas_cher2k.$(PSUFFIX) : syr2k.c
cblas_zher2k.$(SUFFIX) cblas_zher2k.$(PSUFFIX) : syr2k.c
$(CC) -DCBLAS -c $(CFLAGS) -DHEMM $< -o $(@F)
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : getf2.c
sgetf2.$(SUFFIX) sgetf2.$(PSUFFIX) : lapack/getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : getf2.c
dgetf2.$(SUFFIX) dgetf2.$(PSUFFIX) : lapack/getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgetf2.$(SUFFIX) qgetf2.$(PSUFFIX) : getf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : zgetf2.c
cgetf2.$(SUFFIX) cgetf2.$(PSUFFIX) : lapack/zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : zgetf2.c
zgetf2.$(SUFFIX) zgetf2.$(PSUFFIX) : lapack/zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgetf2.$(SUFFIX) xgetf2.$(PSUFFIX) : zgetf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : getrf.c
sgetrf.$(SUFFIX) sgetrf.$(PSUFFIX) : lapack/getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : getrf.c
dgetrf.$(SUFFIX) dgetrf.$(PSUFFIX) : lapack/getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgetrf.$(SUFFIX) qgetrf.$(PSUFFIX) : getrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : zgetrf.c
cgetrf.$(SUFFIX) cgetrf.$(PSUFFIX) : lapack/zgetrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : zgetrf.c
zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
@ -1803,37 +1824,37 @@ zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
$(CC) -c $(CFLAGS) $< -o $(@F)
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : potf2.c
spotf2.$(SUFFIX) spotf2.$(PSUFFIX) : lapack/potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : potf2.c
dpotf2.$(SUFFIX) dpotf2.$(PSUFFIX) : lapack/potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qpotf2.$(SUFFIX) qpotf2.$(PSUFFIX) : potf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : zpotf2.c
cpotf2.$(SUFFIX) cpotf2.$(PSUFFIX) : lapack/zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : zpotf2.c
zpotf2.$(SUFFIX) zpotf2.$(PSUFFIX) : lapack/zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xpotf2.$(SUFFIX) xpotf2.$(PSUFFIX) : zpotf2.c
$(CC) -c $(CFLAGS) $< -o $(@F)
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : potrf.c
spotrf.$(SUFFIX) spotrf.$(PSUFFIX) : lapack/potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : potrf.c
dpotrf.$(SUFFIX) dpotrf.$(PSUFFIX) : lapack/potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qpotrf.$(SUFFIX) qpotrf.$(PSUFFIX) : potrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : zpotrf.c
cpotrf.$(SUFFIX) cpotrf.$(PSUFFIX) : lapack/zpotrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : zpotrf.c
zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
@ -1875,55 +1896,55 @@ ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
$(CC) -c $(CFLAGS) $< -o $(@F)
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : laswp.c
slaswp.$(SUFFIX) slaswp.$(PSUFFIX) : lapack/laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : laswp.c
dlaswp.$(SUFFIX) dlaswp.$(PSUFFIX) : lapack/laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qlaswp.$(SUFFIX) qlaswp.$(PSUFFIX) : laswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
claswp.$(SUFFIX) claswp.$(PSUFFIX) : zlaswp.c
claswp.$(SUFFIX) claswp.$(PSUFFIX) : lapack/zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : zlaswp.c
zlaswp.$(SUFFIX) zlaswp.$(PSUFFIX) : lapack/zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xlaswp.$(SUFFIX) xlaswp.$(PSUFFIX) : zlaswp.c
$(CC) -c $(CFLAGS) $< -o $(@F)
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : getrs.c
sgetrs.$(SUFFIX) sgetrs.$(PSUFFIX) : lapack/getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : getrs.c
dgetrs.$(SUFFIX) dgetrs.$(PSUFFIX) : lapack/getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgetrs.$(SUFFIX) qgetrs.$(PSUFFIX) : getrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : zgetrs.c
cgetrs.$(SUFFIX) cgetrs.$(PSUFFIX) : lapack/zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : zgetrs.c
zgetrs.$(SUFFIX) zgetrs.$(PSUFFIX) : lapack/zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgetrs.$(SUFFIX) xgetrs.$(PSUFFIX) : zgetrs.c
$(CC) -c $(CFLAGS) $< -o $(@F)
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : gesv.c
sgesv.$(SUFFIX) sgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : gesv.c
dgesv.$(SUFFIX) dgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
qgesv.$(SUFFIX) qgesv.$(PSUFFIX) : gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : gesv.c
cgesv.$(SUFFIX) cgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : gesv.c
zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c
$(CC) -c $(CFLAGS) $< -o $(@F)
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c

View File

@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
#endif
};
#ifdef SMP
#ifdef SMPBUG
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qsbmv_thread_U, qsbmv_thread_L,
@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@ -130,7 +130,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
#ifdef SMP
#ifdef SMPBUG
} else {
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);

View File

@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLAS
#endif
};
#ifdef SMP
#ifdef SMPTEST
static int (*spmv_thread[])(BLASLONG, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
qspmv_thread_U, qspmv_thread_L,
@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif
@ -126,7 +126,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif
@ -181,7 +181,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPTEST
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@ -189,7 +189,7 @@ void CNAME(enum CBLAS_ORDER order,
(spmv[uplo])(n, alpha, a, x, incx, y, incy, buffer);
#ifdef SMP
#ifdef SMPTEST
} else {
(spmv_thread[uplo])(n, alpha, a, x, incx, y, incy, buffer, nthreads);

View File

@ -145,12 +145,21 @@ void NAME(char *UPLO, char *TRANS,
if (uplo_arg == 'U') uplo = 0;
if (uplo_arg == 'L') uplo = 1;
#ifndef COMPLEX
if (trans_arg == 'N') trans = 0;
#ifndef HEMM
if (trans_arg == 'T') trans = 1;
if (trans_arg == 'R') trans = 0;
#endif
if (trans_arg == 'C') trans = 1;
#else
#ifdef HEMM
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
#endif
#endif
nrowa = args.n;
if (trans & 1) nrowa = args.k;

View File

@ -148,12 +148,21 @@ void NAME(char *UPLO, char *TRANS,
if (uplo_arg == 'U') uplo = 0;
if (uplo_arg == 'L') uplo = 1;
#ifndef COMPLEX
if (trans_arg == 'N') trans = 0;
#ifndef HEMM
if (trans_arg == 'T') trans = 1;
if (trans_arg == 'R') trans = 0;
#endif
if (trans_arg == 'C') trans = 1;
#else
#ifdef HEMM
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'C') trans = 1;
#else
if (trans_arg == 'N') trans = 0;
if (trans_arg == 'T') trans = 1;
#endif
#endif
nrowa = args.n;
if (trans & 1) nrowa = args.k;

View File

@ -61,7 +61,7 @@ static int (*hbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif
};
#ifdef SMP
#ifdef SMPBUG
static int (*hbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xhbmv_thread_U, xhbmv_thread_L, xhbmv_thread_V, xhbmv_thread_M,
@ -92,7 +92,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@ -138,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer;
int uplo;
blasint info;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@ -197,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@ -205,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
(hbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, x, incx, y, incy, buffer);
#ifdef SMP
#ifdef SMPBUG
} else {
(hbmv_thread[uplo])(n, k, ALPHA, a, lda, x, incx, y, incy, buffer, nthreads);

View File

@ -61,7 +61,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif
};
#ifdef SMP
#ifdef SMPBUG
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xsbmv_thread_U, xsbmv_thread_L,
@ -90,7 +90,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPBUG
int nthreads;
#endif
@ -131,7 +131,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPBUG
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@ -139,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
#ifdef SMP
#ifdef SMPBUG
} else {
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);

View File

@ -61,7 +61,7 @@ static int (*spmv[])(BLASLONG, FLOAT, FLOAT, FLOAT *, FLOAT *, BLASLONG, FLOAT
#endif
};
#ifdef SMP
#ifdef SMPTEST
static int (*spmv_thread[])(BLASLONG, FLOAT *, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE
xspmv_thread_U, xspmv_thread_L,
@ -88,7 +88,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
blasint info;
int uplo;
FLOAT *buffer;
#ifdef SMP
#ifdef SMPTEST
int nthreads;
#endif
@ -127,7 +127,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
buffer = (FLOAT *)blas_memory_alloc(1);
#ifdef SMP
#ifdef SMPTEST
nthreads = num_cpu_avail(2);
if (nthreads == 1) {
@ -135,7 +135,7 @@ void NAME(char *UPLO, blasint *N, FLOAT *ALPHA, FLOAT *a,
(spmv[uplo])(n, alpha_r, alpha_i, a, b, incx, c, incy, buffer);
#ifdef SMP
#ifdef SMPTEST
} else {

134
kernel/arm/KERNEL.ARMV5 Normal file
View File

@ -0,0 +1,134 @@
SAMAXKERNEL = ../arm/amax.c
DAMAXKERNEL = ../arm/amax.c
CAMAXKERNEL = ../arm/zamax.c
ZAMAXKERNEL = ../arm/zamax.c
SAMINKERNEL = ../arm/amin.c
DAMINKERNEL = ../arm/amin.c
CAMINKERNEL = ../arm/zamin.c
ZAMINKERNEL = ../arm/zamin.c
SMAXKERNEL = ../arm/max.c
DMAXKERNEL = ../arm/max.c
SMINKERNEL = ../arm/min.c
DMINKERNEL = ../arm/min.c
ISAMAXKERNEL = ../arm/iamax.c
IDAMAXKERNEL = ../arm/iamax.c
ICAMAXKERNEL = ../arm/izamax.c
IZAMAXKERNEL = ../arm/izamax.c
ISAMINKERNEL = ../arm/iamin.c
IDAMINKERNEL = ../arm/iamin.c
ICAMINKERNEL = ../arm/izamin.c
IZAMINKERNEL = ../arm/izamin.c
ISMAXKERNEL = ../arm/imax.c
IDMAXKERNEL = ../arm/imax.c
ISMINKERNEL = ../arm/imin.c
IDMINKERNEL = ../arm/imin.c
SASUMKERNEL = ../arm/asum.c
DASUMKERNEL = ../arm/asum.c
CASUMKERNEL = ../arm/zasum.c
ZASUMKERNEL = ../arm/zasum.c
SAXPYKERNEL = ../arm/axpy.c
DAXPYKERNEL = ../arm/axpy.c
CAXPYKERNEL = ../arm/zaxpy.c
ZAXPYKERNEL = ../arm/zaxpy.c
SCOPYKERNEL = ../arm/copy.c
DCOPYKERNEL = ../arm/copy.c
CCOPYKERNEL = ../arm/zcopy.c
ZCOPYKERNEL = ../arm/zcopy.c
SDOTKERNEL = ../arm/dot.c
DDOTKERNEL = ../arm/dot.c
CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = ../arm/zdot.c
SNRM2KERNEL = ../arm/nrm2.c
DNRM2KERNEL = ../arm/nrm2.c
CNRM2KERNEL = ../arm/znrm2.c
ZNRM2KERNEL = ../arm/znrm2.c
SROTKERNEL = ../arm/rot.c
DROTKERNEL = ../arm/rot.c
CROTKERNEL = ../arm/zrot.c
ZROTKERNEL = ../arm/zrot.c
SSCALKERNEL = ../arm/scal.c
DSCALKERNEL = ../arm/scal.c
CSCALKERNEL = ../arm/zscal.c
ZSCALKERNEL = ../arm/zscal.c
SSWAPKERNEL = ../arm/swap.c
DSWAPKERNEL = ../arm/swap.c
CSWAPKERNEL = ../arm/zswap.c
ZSWAPKERNEL = ../arm/zswap.c
SGEMVNKERNEL = ../arm/gemv_n.c
DGEMVNKERNEL = ../arm/gemv_n.c
CGEMVNKERNEL = ../arm/zgemv_n.c
ZGEMVNKERNEL = ../arm/zgemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
DGEMVTKERNEL = ../arm/gemv_t.c
CGEMVTKERNEL = ../arm/zgemv_t.c
ZGEMVTKERNEL = ../arm/zgemv_t.c
STRMMKERNEL = ../generic/trmmkernel_2x2.c
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

View File

@ -1,11 +1,20 @@
SGEMVNKERNEL = ../arm/gemv_n.c
SGEMVTKERNEL = ../arm/gemv_t.c
DGEMVNKERNEL = ../arm/gemv_n.c
DGEMVTKERNEL = ../arm/gemv_t.c
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
#STRMMKERNEL = ../generic/trmmkernel_2x2.c
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
#SGEMMONCOPY = ../generic/gemm_ncopy_2.c
@ -86,18 +95,18 @@ CSWAPKERNEL = swap_vfp.S
ZSWAPKERNEL = swap_vfp.S
# BAD SGEMVNKERNEL = gemv_n_vfp.S
DGEMVNKERNEL = gemv_n_vfp.S
# BAD DGEMVNKERNEL = gemv_n_vfp.S
CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S
# BAD SGEMVTKERNEL = gemv_t_vfp.S
DGEMVTKERNEL = gemv_t_vfp.S
# BAD DGEMVTKERNEL = gemv_t_vfp.S
CGEMVTKERNEL = cgemv_t_vfp.S
ZGEMVTKERNEL = zgemv_t_vfp.S
STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
# CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
#CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S

View File

@ -7,15 +7,19 @@ SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
DGEMMINCOPY = dgemm_ncopy_2.S
DGEMMITCOPY = dgemm_tcopy_2.S
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = dgemm_tcopy_8.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = gemm_kernel_4x4_core2.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = gemm_ncopy_4.S
DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMINCOPY = zgemm_ncopy_2.S
CGEMMITCOPY = zgemm_tcopy_2.S
@ -40,10 +44,11 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S
DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S
DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S
CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S

View File

@ -1,34 +1,35 @@
SGEMMKERNEL = sgemm_kernel_8x8_sandy.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMKERNEL = gemm_kernel_4x8_nehalem.S
SGEMMINCOPY = gemm_ncopy_4.S
SGEMMITCOPY = gemm_tcopy_4.S
SGEMMONCOPY = ../generic/gemm_ncopy_8.c
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = dgemm_kernel_4x8_sandy.S
DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
#DGEMMONCOPY = gemm_ncopy_4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
#DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
#CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMKERNEL = cgemm_kernel_4x8_sandy.S
CGEMMINCOPY = ../generic/zgemm_ncopy_8_sandy.c
CGEMMITCOPY = ../generic/zgemm_tcopy_8_sandy.c
CGEMMONCOPY = ../generic/zgemm_ncopy_4_sandy.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4_sandy.c
CGEMMKERNEL = zgemm_kernel_2x4_nehalem.S
CGEMMINCOPY = zgemm_ncopy_2.S
CGEMMITCOPY = zgemm_tcopy_2.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
#ZGEMMKERNEL = zgemm_kernel_1x4_nehalem.S
ZGEMMKERNEL = zgemm_kernel_4x4_sandy.S
ZGEMMINCOPY =
ZGEMMITCOPY =
@ -58,6 +59,7 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
#ZTRSMKERNEL_LT = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RN = ztrsm_kernel_LT_1x4_nehalem.S
#ZTRSMKERNEL_RT = ztrsm_kernel_RT_1x4_nehalem.S
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c

19
lapack-devel.log Normal file
View File

@ -0,0 +1,19 @@
========================================================================================
2014/05/07 Saar
Platform: BULLDOZER single thread
--> LAPACK TESTING SUMMARY <--
Processing LAPACK Testing output found in the TESTING direcory
SUMMARY nb test run numerical error other error
================ =========== ================= ================
REAL 1079349 0 (0.000%) 0 (0.000%)
DOUBLE PRECISION 1080161 0 (0.000%) 0 (0.000%)
COMPLEX 556022 0 (0.000%) 0 (0.000%)
COMPLEX16 556834 0 (0.000%) 0 (0.000%)
--> ALL PRECISIONS 3272366 0 (0.000%) 0 (0.000%)
========================================================================================

View File

@ -54,9 +54,9 @@ include ../make.inc
#
#######################################################################
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla_array.o iparmq.o \
ALLAUX = ilaenv.o ieeeck.o lsamen.o xerbla.o xerbla_array.o iparmq.o \
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
../INSTALL/ilaver.o
../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o
SCLAUX = \
sbdsdc.o \
@ -92,7 +92,7 @@ DZLAUX = \
dlasr.o dlasrt.o dlassq.o dlasv2.o dpttrf.o dstebz.o dstedc.o \
dsteqr.o dsterf.o dlaisnan.o disnan.o \
dlartgp.o dlartgs.o \
../INSTALL/dsecnd_$(TIMER).o
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
SLASRC = \
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
@ -101,7 +101,7 @@ SLASRC = \
sgegs.o sgegv.o sgehd2.o sgehrd.o sgelq2.o sgelqf.o \
sgels.o sgelsd.o sgelss.o sgelsx.o sgelsy.o sgeql2.o sgeqlf.o \
sgeqp3.o sgeqpf.o sgeqr2.o sgeqr2p.o sgeqrf.o sgeqrfp.o sgerfs.o \
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesv.o sgesvd.o sgesvx.o \
sgerq2.o sgerqf.o sgesc2.o sgesdd.o sgesvd.o sgesvx.o \
sgetc2.o sgetri.o \
sggbak.o sggbal.o sgges.o sggesx.o sggev.o sggevx.o \
sggglm.o sgghrd.o sgglse.o sggqrf.o \
@ -120,7 +120,7 @@ SLASRC = \
slarrv.o slartv.o \
slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
@ -147,7 +147,7 @@ SLASRC = \
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
stptrs.o \
strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
strtrs.o stzrqf.o stzrzf.o sstemr.o \
strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
@ -157,7 +157,7 @@ SLASRC = \
sgeqrt.o sgeqrt2.o sgeqrt3.o sgemqrt.o \
stpqrt.o stpqrt2.o stpmqrt.o stprfb.o
DSLASRC = spotrs.o
DSLASRC = spotrs.o
ifdef USEXBLAS
SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
@ -176,7 +176,7 @@ CLASRC = \
cgegs.o cgegv.o cgehd2.o cgehrd.o cgelq2.o cgelqf.o \
cgels.o cgelsd.o cgelss.o cgelsx.o cgelsy.o cgeql2.o cgeqlf.o cgeqp3.o \
cgeqpf.o cgeqr2.o cgeqr2p.o cgeqrf.o cgeqrfp.o cgerfs.o \
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesv.o cgesvd.o \
cgerq2.o cgerqf.o cgesc2.o cgesdd.o cgesvd.o \
cgesvx.o cgetc2.o cgetri.o \
cggbak.o cggbal.o cgges.o cggesx.o cggev.o cggevx.o cggglm.o \
cgghrd.o cgglse.o cggqrf.o cggrqf.o \
@ -208,7 +208,7 @@ CLASRC = \
clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
@ -225,7 +225,7 @@ CLASRC = \
ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
ctprfs.o ctptri.o \
ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
ctrsyl.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
@ -252,7 +252,7 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
endif
ZCLASRC = cpotrs.o
ZCLASRC = cpotrs.o
DLASRC = \
dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
@ -261,7 +261,7 @@ DLASRC = \
dgegs.o dgegv.o dgehd2.o dgehrd.o dgelq2.o dgelqf.o \
dgels.o dgelsd.o dgelss.o dgelsx.o dgelsy.o dgeql2.o dgeqlf.o \
dgeqp3.o dgeqpf.o dgeqr2.o dgeqr2p.o dgeqrf.o dgeqrfp.o dgerfs.o \
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesv.o dgesvd.o dgesvx.o \
dgerq2.o dgerqf.o dgesc2.o dgesdd.o dgesvd.o dgesvx.o \
dgetc2.o dgetri.o \
dggbak.o dggbal.o dgges.o dggesx.o dggev.o dggevx.o \
dggglm.o dgghrd.o dgglse.o dggqrf.o \
@ -279,8 +279,8 @@ DLASRC = \
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
dlargv.o dlarrv.o dlartv.o \
dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o \
dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
@ -307,7 +307,7 @@ DLASRC = \
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
dtptrs.o \
dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
@ -335,8 +335,8 @@ ZLASRC = \
zgegs.o zgegv.o zgehd2.o zgehrd.o zgelq2.o zgelqf.o \
zgels.o zgelsd.o zgelss.o zgelsx.o zgelsy.o zgeql2.o zgeqlf.o zgeqp3.o \
zgeqpf.o zgeqr2.o zgeqr2p.o zgeqrf.o zgeqrfp.o zgerfs.o zgerq2.o zgerqf.o \
zgesc2.o zgesdd.o zgesv.o zgesvd.o zgesvx.o zgetc2.o \
zgetri.o \
zgesc2.o zgesdd.o zgesvd.o zgesvx.o zgetc2.o \
zgetri.o \
zggbak.o zggbal.o zgges.o zggesx.o zggev.o zggevx.o zggglm.o \
zgghrd.o zgglse.o zggqrf.o zggrqf.o \
zggsvd.o zggsvp.o \
@ -370,7 +370,7 @@ ZLASRC = \
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
zlassq.o zlasyf.o zlasyf_rook.o \
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
@ -387,7 +387,7 @@ ZLASRC = \
ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
ztprfs.o ztptri.o \
ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
ztrsyl.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
@ -417,8 +417,6 @@ endif
ALLOBJ = $(SLASRC) $(DLASRC) $(DSLASRC) $(CLASRC) $(ZLASRC) $(ZCLASRC) \
$(SCLAUX) $(DZLAUX) $(ALLAUX)
ALLOBJ_P = $(ALLOBJ:.o=.$(PSUFFIX))
ifdef USEXBLAS
ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif
@ -435,6 +433,7 @@ lapacklib: $(ALLOBJ) $(ALLXOBJ)
$(ARCH) $(ARCHFLAGS) $@ $(ALLOBJ_P)
$(RANLIB) $@
single: $(SLASRC) $(DSLASRC) $(SXLASRC) $(SCLAUX) $(ALLAUX)
$(ARCH) $(ARCHFLAGS) ../$(LAPACKLIB) $(SLASRC) $(DSLASRC) \
$(SXLASRC) $(SCLAUX) $(ALLAUX) $(ALLXAUX)
@ -483,16 +482,11 @@ clean:
%.$(PSUFFIX): %.f
$(FORTRAN) $(POPTS) -c $< -o $@
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -O0 -c $< -o $@
slaruv.$(PSUFFIX): slaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
dlaruv.$(PSUFFIX): dlaruv.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
sla_wwaddw.$(PSUFFIX): sla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
dla_wwaddw.$(PSUFFIX): dla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
cla_wwaddw.$(PSUFFIX): cla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
zla_wwaddw.$(PSUFFIX): zla_wwaddw.f ; $(FORTRAN) $(PNOOPT) -O0 -c $< -o $@
slaruv.o: slaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
dlaruv.o: dlaruv.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
sla_wwaddw.o: sla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
dla_wwaddw.o: dla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
cla_wwaddw.o: cla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@
zla_wwaddw.o: zla_wwaddw.f ; $(FORTRAN) $(NOOPT) -c $< -o $@

View File

@ -5,5 +5,5 @@ Data file for testing COMPLEX LAPACK linear equation routines RFP format
1 2 15 Values of NRHS (number of right hand sides)
9 Number of matrix types (list types on next line if 0 < NTYPES < 9)
1 2 3 4 5 6 7 8 9 Matrix Types
30.0 Threshold value of test ratio
50.0 Threshold value of test ratio
T Put T to test the error exits

View File

@ -7,7 +7,7 @@ SVD: Data file for testing Singular Value Decomposition routines
2 2 2 2 2 Values of NBMIN (minimum blocksize)
1 0 5 9 1 Values of NX (crossover point)
2 0 2 2 2 Values of NRHS
50.0 Threshold value
54.0 Threshold value
T Put T to test the LAPACK routines
T Put T to test the driver routines
T Put T to test the error exits

View File

@ -1,7 +1,8 @@
TOPDIR = ..
include ../Makefile.system
SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
SUBDIRS = getrf getf2 laswp getrs potrf potf2
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2

View File

@ -5,7 +5,7 @@ LOADER = $(FORTRAN)
TIMER = NONE
ARCHFLAGS= -ru
#RANLIB = ranlib
BLASLIB =
BLASLIB = ../../../libopenblas.a
TMGLIB = tmglib.a
EIGSRCLIB = eigsrc.a
LINSRCLIB = linsrc.a
#EIGSRCLIB = eigsrc.a
#LINSRCLIB = linsrc.a

48
param.h
View File

@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_M 4
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1
#define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_N 4
@ -1104,10 +1104,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_N 2
#define XGEMM_DEFAULT_UNROLL_N 1
#else
#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 8
#define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 8
#define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 4
#define XGEMM_DEFAULT_UNROLL_M 1
@ -2021,6 +2021,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#if defined(ARMV5)
#define SNUMOPT 2
#define DNUMOPT 2
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 0
#define GEMM_DEFAULT_ALIGN 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 2
#define SGEMM_DEFAULT_UNROLL_N 2
#define DGEMM_DEFAULT_UNROLL_M 2
#define DGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_N 2
#define ZGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_N 2
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 96
#define ZGEMM_DEFAULT_P 64
#define SGEMM_DEFAULT_Q 240
#define DGEMM_DEFAULT_Q 120
#define CGEMM_DEFAULT_Q 120
#define ZGEMM_DEFAULT_Q 120
#define SGEMM_DEFAULT_R 12288
#define DGEMM_DEFAULT_R 8192
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096
#define SYMV_P 16
#endif
#ifdef GENERIC