Merge branch 'develop'
This commit is contained in:
commit
f9991fd5f6
8
Makefile
8
Makefile
|
@ -247,10 +247,16 @@ ifndef NOFORTRAN
|
||||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(FC), GFORTRAN)
|
||||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
ifdef SMP
|
||||||
|
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
else
|
||||||
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.2.10.rc1
|
VERSION = 0.2.10.rc2
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
|
3
cblas.h
3
cblas.h
|
@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads);
|
||||||
/*Get the build configure on runtime.*/
|
/*Get the build configure on runtime.*/
|
||||||
char* openblas_get_config(void);
|
char* openblas_get_config(void);
|
||||||
|
|
||||||
|
/*Get the CPU corename on runtime.*/
|
||||||
|
char* openblas_get_corename(void);
|
||||||
|
|
||||||
/* Get the parallelization type which is used by OpenBLAS */
|
/* Get the parallelization type which is used by OpenBLAS */
|
||||||
int openblas_get_parallel(void);
|
int openblas_get_parallel(void);
|
||||||
/* OpenBLAS is compiled for sequential use */
|
/* OpenBLAS is compiled for sequential use */
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
TOPDIR = ../..
|
TOPDIR = ../..
|
||||||
include ../../Makefile.system
|
include ../../Makefile.system
|
||||||
|
|
||||||
|
USE_GEMM3M = 0
|
||||||
|
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
USE_GEMM3M = 1
|
USE_GEMM3M = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), x86_64)
|
ifeq ($(ARCH), x86_64)
|
||||||
USE_GEMM3M = 1
|
USE_GEMM3M = 0
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), ia64)
|
ifeq ($(ARCH), ia64)
|
||||||
|
@ -168,7 +170,7 @@ XBLASOBJS += \
|
||||||
xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \
|
xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \
|
||||||
xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX)
|
xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX)
|
||||||
|
|
||||||
ifdef USE_GEMM3M
|
ifeq ($(USE_GEMM3M), 1)
|
||||||
|
|
||||||
CBLASOBJS += \
|
CBLASOBJS += \
|
||||||
cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \
|
cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \
|
||||||
|
@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread
|
||||||
ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX)
|
ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX)
|
||||||
XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX)
|
XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX)
|
||||||
|
|
||||||
ifdef USE_GEMM3M
|
ifeq ($(USE_GEMM3M), 1)
|
||||||
|
|
||||||
CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX)
|
CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX)
|
||||||
CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX)
|
CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX)
|
||||||
|
|
|
@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
static char* openblas_config_str=""
|
static char* openblas_config_str=""
|
||||||
#ifdef USE64BITINT
|
#ifdef USE64BITINT
|
||||||
"USE64BITINT "
|
"USE64BITINT "
|
||||||
|
@ -50,10 +52,33 @@ static char* openblas_config_str=""
|
||||||
#endif
|
#endif
|
||||||
#ifdef NO_AFFINITY
|
#ifdef NO_AFFINITY
|
||||||
"NO_AFFINITY "
|
"NO_AFFINITY "
|
||||||
|
#endif
|
||||||
|
#ifndef DYNAMIC_ARCH
|
||||||
|
CHAR_CORENAME
|
||||||
#endif
|
#endif
|
||||||
;
|
;
|
||||||
|
|
||||||
|
#ifdef DYNAMIC_ARCH
|
||||||
|
char *gotoblas_corename();
|
||||||
|
static char tmp_config_str[256];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
char* CNAME() {
|
char* CNAME() {
|
||||||
|
#ifndef DYNAMIC_ARCH
|
||||||
return openblas_config_str;
|
return openblas_config_str;
|
||||||
|
#else
|
||||||
|
strcpy(tmp_config_str, openblas_config_str);
|
||||||
|
strcat(tmp_config_str, gotoblas_corename());
|
||||||
|
return tmp_config_str;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
char* openblas_get_corename() {
|
||||||
|
#ifndef DYNAMIC_ARCH
|
||||||
|
return CHAR_CORENAME;
|
||||||
|
#else
|
||||||
|
return gotoblas_corename();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
|
@ -165,7 +165,8 @@ int get_L2_size(void){
|
||||||
|
|
||||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
|
||||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
|
||||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC)
|
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
|
||||||
|
defined(PILEDRIVER) || defined(HASWELL)
|
||||||
|
|
||||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
cpuid(0x80000006, &eax, &ebx, &ecx, &edx);
|
||||||
|
|
||||||
|
|
|
@ -73,7 +73,7 @@
|
||||||
);
|
);
|
||||||
|
|
||||||
@gemm3mobjs = (
|
@gemm3mobjs = (
|
||||||
zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m,
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,6 +85,7 @@
|
||||||
@misc_no_underscore_objs = (
|
@misc_no_underscore_objs = (
|
||||||
goto_set_num_threads,
|
goto_set_num_threads,
|
||||||
openblas_get_config,
|
openblas_get_config,
|
||||||
|
openblas_get_corename,
|
||||||
);
|
);
|
||||||
|
|
||||||
@misc_underscore_objs = (
|
@misc_underscore_objs = (
|
||||||
|
|
|
@ -952,6 +952,15 @@ int main(int argc, char *argv[]){
|
||||||
#else
|
#else
|
||||||
get_cpuconfig();
|
get_cpuconfig();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef FORCE
|
||||||
|
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
|
||||||
|
#else
|
||||||
|
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
|
||||||
|
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case '2' : /* SMP */
|
case '2' : /* SMP */
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
TOPDIR = ..
|
TOPDIR = ..
|
||||||
include $(TOPDIR)/Makefile.system
|
include $(TOPDIR)/Makefile.system
|
||||||
|
|
||||||
|
SUPPORT_GEMM3M = 0
|
||||||
|
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
SUPPORT_GEMM3M = 0
|
SUPPORT_GEMM3M = 0
|
||||||
endif
|
endif
|
||||||
|
@ -124,7 +126,7 @@ ZBLAS3OBJS = \
|
||||||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
|
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
|
||||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
|
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)
|
||||||
|
|
||||||
ifdef SUPPORT_GEMM3M
|
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||||
|
|
||||||
CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX)
|
CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX)
|
||||||
|
|
||||||
|
@ -182,7 +184,7 @@ XBLAS3OBJS = \
|
||||||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
|
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
|
||||||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
|
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
|
||||||
|
|
||||||
ifdef SUPPORT_GEMM3M
|
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||||
|
|
||||||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
||||||
|
|
||||||
|
@ -238,7 +240,7 @@ XBLAS3OBJS = \
|
||||||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
|
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
|
||||||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
|
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)
|
||||||
|
|
||||||
ifdef SUPPORT_GEMM3M
|
ifeq ($(SUPPORT_GEMM3M), 1)
|
||||||
|
|
||||||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)
|
||||||
|
|
||||||
|
|
|
@ -42,6 +42,12 @@
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef SMP
|
||||||
|
#ifdef __64BIT__
|
||||||
|
#define SMPTEST 1
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
#define ERROR_NAME "QGER "
|
#define ERROR_NAME "QGER "
|
||||||
#elif defined DOUBLE
|
#elif defined DOUBLE
|
||||||
|
@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
|
||||||
blasint incy = *INCY;
|
blasint incy = *INCY;
|
||||||
blasint lda = *LDA;
|
blasint lda = *LDA;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
blasint info, t;
|
blasint info, t;
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
|
|
||||||
|
@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);
|
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads);
|
GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads);
|
||||||
|
|
|
@ -43,6 +43,14 @@
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
#ifdef SMP
|
||||||
|
#ifdef __64BIT__
|
||||||
|
#define SMPTEST 1
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
#define ERROR_NAME "QSBMV "
|
#define ERROR_NAME "QSBMV "
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
|
@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
qsbmv_thread_U, qsbmv_thread_L,
|
qsbmv_thread_U, qsbmv_thread_L,
|
||||||
|
@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
blasint info;
|
blasint info;
|
||||||
int uplo;
|
int uplo;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
int uplo;
|
int uplo;
|
||||||
blasint info;
|
blasint info;
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
|
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
|
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);
|
||||||
|
|
|
@ -42,6 +42,13 @@
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef SMP
|
||||||
|
#ifdef __64BIT__
|
||||||
|
#define SMPTEST 1
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
#ifndef CONJ
|
#ifndef CONJ
|
||||||
#define ERROR_NAME "XGERU "
|
#define ERROR_NAME "XGERU "
|
||||||
|
@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
|
||||||
blasint incy = *INCY;
|
blasint incy = *INCY;
|
||||||
blasint lda = *LDA;
|
blasint lda = *LDA;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
blasint info, t;
|
blasint info, t;
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order,
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
|
|
|
@ -43,6 +43,14 @@
|
||||||
#include "functable.h"
|
#include "functable.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
#ifdef SMP
|
||||||
|
#ifdef __64BIT__
|
||||||
|
#define SMPTEST 1
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
*/
|
||||||
|
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
#define ERROR_NAME "XSBMV "
|
#define ERROR_NAME "XSBMV "
|
||||||
#elif defined(DOUBLE)
|
#elif defined(DOUBLE)
|
||||||
|
@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
|
||||||
#ifdef XDOUBLE
|
#ifdef XDOUBLE
|
||||||
xsbmv_thread_U, xsbmv_thread_L,
|
xsbmv_thread_U, xsbmv_thread_L,
|
||||||
|
@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
blasint info;
|
blasint info;
|
||||||
int uplo;
|
int uplo;
|
||||||
FLOAT *buffer;
|
FLOAT *buffer;
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
int nthreads;
|
int nthreads;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
|
|
||||||
buffer = (FLOAT *)blas_memory_alloc(1);
|
buffer = (FLOAT *)blas_memory_alloc(1);
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
nthreads = num_cpu_avail(2);
|
nthreads = num_cpu_avail(2);
|
||||||
|
|
||||||
if (nthreads == 1) {
|
if (nthreads == 1) {
|
||||||
|
@ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
|
||||||
|
|
||||||
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
|
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);
|
||||||
|
|
||||||
#ifdef SMPBUG
|
#ifdef SMPTEST
|
||||||
} else {
|
} else {
|
||||||
|
|
||||||
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
|
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
USE_GEMM3M = 0
|
||||||
|
|
||||||
ifeq ($(ARCH), x86)
|
ifeq ($(ARCH), x86)
|
||||||
USE_GEMM3M = 1
|
USE_GEMM3M = 1
|
||||||
endif
|
endif
|
||||||
|
@ -122,7 +124,7 @@ XBLASOBJS += \
|
||||||
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
|
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
|
||||||
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
|
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \
|
||||||
|
|
||||||
ifdef USE_GEMM3M
|
ifeq ($(USE_GEMM3M), 1)
|
||||||
|
|
||||||
CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX)
|
CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX)
|
||||||
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX)
|
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX)
|
||||||
|
@ -256,7 +258,7 @@ XBLASOBJS += \
|
||||||
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
|
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
|
||||||
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX)
|
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ifdef USE_GEMM3M
|
ifeq ($(USE_GEMM3M), 1)
|
||||||
|
|
||||||
CBLASOBJS += \
|
CBLASOBJS += \
|
||||||
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
|
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
|
||||||
|
|
|
@ -0,0 +1,104 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2014, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#if defined(DSDOT)
|
||||||
|
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
|
#else
|
||||||
|
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
|
#endif
|
||||||
|
{
|
||||||
|
BLASLONG i=0;
|
||||||
|
BLASLONG ix=0,iy=0;
|
||||||
|
|
||||||
|
#if defined(DSDOT)
|
||||||
|
double dot = 0.0 ;
|
||||||
|
#else
|
||||||
|
FLOAT dot = 0.0 ;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if ( n < 0 ) return(dot);
|
||||||
|
|
||||||
|
if ( (inc_x == 1) && (inc_y == 1) )
|
||||||
|
{
|
||||||
|
|
||||||
|
int n1 = n & -4;
|
||||||
|
|
||||||
|
while(i < n1)
|
||||||
|
{
|
||||||
|
|
||||||
|
#if defined(DSDOT)
|
||||||
|
dot += (double) y[i] * (double) x[i]
|
||||||
|
+ (double) y[i+1] * (double) x[i+1]
|
||||||
|
+ (double) y[i+2] * (double) x[i+2]
|
||||||
|
+ (double) y[i+3] * (double) x[i+3] ;
|
||||||
|
#else
|
||||||
|
dot += y[i] * x[i]
|
||||||
|
+ y[i+1] * x[i+1]
|
||||||
|
+ y[i+2] * x[i+2]
|
||||||
|
+ y[i+3] * x[i+3] ;
|
||||||
|
#endif
|
||||||
|
i+=4 ;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
|
||||||
|
#if defined(DSDOT)
|
||||||
|
dot += (double) y[i] * (double) x[i] ;
|
||||||
|
#else
|
||||||
|
dot += y[i] * x[i] ;
|
||||||
|
#endif
|
||||||
|
i++ ;
|
||||||
|
|
||||||
|
}
|
||||||
|
return(dot);
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
while(i < n)
|
||||||
|
{
|
||||||
|
|
||||||
|
#if defined(DSDOT)
|
||||||
|
dot += (double) y[iy] * (double) x[ix] ;
|
||||||
|
#else
|
||||||
|
dot += y[iy] * x[ix] ;
|
||||||
|
#endif
|
||||||
|
ix += inc_x ;
|
||||||
|
iy += inc_y ;
|
||||||
|
i++ ;
|
||||||
|
|
||||||
|
}
|
||||||
|
return(dot);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -714,13 +714,13 @@ static void init_parameter(void) {
|
||||||
fprintf(stderr, "Core2\n");
|
fprintf(stderr, "Core2\n");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9);
|
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
|
||||||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9);
|
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
|
||||||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9);
|
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
|
||||||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9);
|
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
|
||||||
#ifdef EXPRECISION
|
#ifdef EXPRECISION
|
||||||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9);
|
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
|
||||||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9);
|
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -740,6 +740,23 @@ static void init_parameter(void) {
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef DUNNINGTON
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
fprintf(stderr, "Dunnington\n");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
|
||||||
|
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
|
||||||
|
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
|
||||||
|
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
|
||||||
|
#ifdef EXPRECISION
|
||||||
|
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
|
||||||
|
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#ifdef NEHALEM
|
#ifdef NEHALEM
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
|
|
@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifndef SDOTKERNEL
|
ifndef SDOTKERNEL
|
||||||
SDOTKERNEL = dot_sse.S
|
SDOTKERNEL = ../generic/dot.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
ifndef DSDOTKERNEL
|
ifndef DSDOTKERNEL
|
||||||
DSDOTKERNEL = ../arm/dot.c
|
DSDOTKERNEL = ../generic/dot.c
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
ifndef DDOTKERNEL
|
ifndef DDOTKERNEL
|
||||||
DDOTKERNEL = dot_sse2.S
|
DDOTKERNEL = dot_sse2.S
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
|
||||||
|
|
||||||
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
||||||
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
||||||
DAXPYKERNEL = daxpy_bulldozer.S
|
|
||||||
DDOTKERNEL = ddot_bulldozer.S
|
DDOTKERNEL = ddot_bulldozer.S
|
||||||
DCOPYKERNEL = dcopy_bulldozer.S
|
DCOPYKERNEL = dcopy_bulldozer.S
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S
|
||||||
|
|
||||||
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
DGEMVNKERNEL = dgemv_n_bulldozer.S
|
||||||
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
DGEMVTKERNEL = dgemv_t_bulldozer.S
|
||||||
DAXPYKERNEL = daxpy_bulldozer.S
|
|
||||||
DDOTKERNEL = ddot_bulldozer.S
|
DDOTKERNEL = ddot_bulldozer.S
|
||||||
DCOPYKERNEL = dcopy_bulldozer.S
|
DCOPYKERNEL = dcopy_bulldozer.S
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,7 @@ DGEMMINCOPYOBJ =
|
||||||
DGEMMITCOPYOBJ =
|
DGEMMITCOPYOBJ =
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMKERNEL = zgemm_kernel_4x2_sse3.S
|
CGEMMKERNEL = zgemm_kernel_4x2_sse.S
|
||||||
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
|
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
|
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
CGEMMONCOPY = zgemm_ncopy_2.S
|
CGEMMONCOPY = zgemm_ncopy_2.S
|
||||||
|
|
1
make.inc
1
make.inc
|
@ -1,7 +1,6 @@
|
||||||
SHELL = /bin/sh
|
SHELL = /bin/sh
|
||||||
PLAT = _LINUX
|
PLAT = _LINUX
|
||||||
DRVOPTS = $(OPTS)
|
DRVOPTS = $(OPTS)
|
||||||
LOADER = $(FORTRAN)
|
|
||||||
ARCHFLAGS= -ru
|
ARCHFLAGS= -ru
|
||||||
#RANLIB = ranlib
|
#RANLIB = ranlib
|
||||||
|
|
||||||
|
|
|
@ -1,15 +1,19 @@
|
||||||
UTEST_CHECK = 1
|
UTEST_CHECK = 1
|
||||||
TOPDIR = ..
|
TOPDIR = ..
|
||||||
include $(TOPDIR)/Makefile.system
|
|
||||||
|
|
||||||
TARGET=openblas_utest
|
TARGET=openblas_utest
|
||||||
|
|
||||||
|
.PHONY : all
|
||||||
|
.NOTPARALLEL : all run_test $(TARGET)
|
||||||
|
|
||||||
CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2
|
CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2
|
||||||
CUNIT_DIR=$(CURDIR)/CUnit-2.1-2
|
CUNIT_DIR=$(CURDIR)/CUnit-2.1-2
|
||||||
|
|
||||||
CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a
|
CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a
|
||||||
|
|
||||||
CFLAGS+=-I$(CUNIT_DIR)/include
|
CFLAGS +=-I$(CUNIT_DIR)/include
|
||||||
|
|
||||||
|
include $(TOPDIR)/Makefile.system
|
||||||
|
|
||||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o
|
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue