Merge branch 'develop'
This commit is contained in:
commit
f78eb335d6
|
@ -8,6 +8,8 @@ lapack-3.1.1
|
|||
lapack-3.1.1.tgz
|
||||
lapack-3.4.1
|
||||
lapack-3.4.1.tgz
|
||||
lapack-3.4.2
|
||||
lapack-3.4.2.tgz
|
||||
*.so
|
||||
*.a
|
||||
.svn
|
||||
|
|
|
@ -1,4 +1,20 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.2.5
|
||||
26-Nov-2012
|
||||
common:
|
||||
* Added NO_SHARED flag to disable generating the shared library.
|
||||
* Compile LAPACKE with ILP64 modle when INTERFACE64=1 (#158)
|
||||
* Export LAPACK 3.4.2 symbols in shared library. (#147)
|
||||
* Only detect the number of physical CPU cores on Mac OSX. (#157)
|
||||
* Fixed NetBSD build. (#155)
|
||||
* Fixed compilation with TARGET=GENERIC. (#160)
|
||||
x86/x86-64:
|
||||
* Restore the original CPU affinity when calling
|
||||
openblas_set_num_threads(1) (#153)
|
||||
* Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154)
|
||||
MIPS64:
|
||||
|
||||
====================================================================
|
||||
Version 0.2.4
|
||||
8-Oct-2012
|
||||
|
|
6
Makefile
6
Makefile
|
@ -80,6 +80,7 @@ endif
|
|||
@echo
|
||||
|
||||
shared :
|
||||
ifndef NO_SHARED
|
||||
ifeq ($(OSNAME), Linux)
|
||||
$(MAKE) -C exports so
|
||||
-ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
|
@ -103,6 +104,7 @@ endif
|
|||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
$(MAKE) -C exports dll
|
||||
endif
|
||||
endif
|
||||
|
||||
tests :
|
||||
ifndef NOFORTRAN
|
||||
|
@ -222,7 +224,11 @@ ifndef NOFORTRAN
|
|||
-@echo "PNOOPT = $(FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifdef INTERFACE64
|
||||
-@echo "CFLAGS = $(CFLAGS) -DHAVE_LAPACK_CONFIG_H -DLAPACK_ILP64" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "CFLAGS = $(CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
|
|
|
@ -71,11 +71,9 @@ ifeq ($(OSNAME), Darwin)
|
|||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
|
||||
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll
|
||||
endif
|
||||
|
||||
@echo Install OK!
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.2.4
|
||||
VERSION = 0.2.5
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
@ -24,10 +24,13 @@ VERSION = 0.2.4
|
|||
# Fortran compiler. Default is g77.
|
||||
# FC = gfortran
|
||||
|
||||
# Even you can specify cross compiler
|
||||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
|
||||
# CC = x86_64-w64-mingw32-gcc
|
||||
# FC = x86_64-w64-mingw32-gfortran
|
||||
|
||||
# If you use the cross compiler, please set this host compiler.
|
||||
# HOSTCC = gcc
|
||||
|
||||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
|
||||
# BINARY=64
|
||||
|
||||
|
@ -45,6 +48,9 @@ VERSION = 0.2.4
|
|||
# automatically detected by the the script.
|
||||
# NUM_THREADS = 24
|
||||
|
||||
# if you don't need generate the shared library, please comment it in.
|
||||
# NO_SHARED = 1
|
||||
|
||||
# If you don't need CBLAS interface, please comment it in.
|
||||
# NO_CBLAS = 1
|
||||
|
||||
|
|
|
@ -129,7 +129,7 @@ MD5SUM = md5 -r
|
|||
endif
|
||||
|
||||
ifeq ($(OSNAME), NetBSD)
|
||||
MD5SUM = md5 -r
|
||||
MD5SUM = md5 -n
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Linux)
|
||||
|
|
5
common.h
5
common.h
|
@ -351,7 +351,12 @@ typedef int blasint;
|
|||
#endif
|
||||
|
||||
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
|
||||
|
||||
#ifdef __NetBSD__
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
|
||||
#else
|
||||
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
|
||||
#endif
|
||||
|
||||
#include "param.h"
|
||||
#include "common_param.h"
|
||||
|
|
|
@ -770,6 +770,19 @@ void goto_set_num_threads(int num_threads) {
|
|||
|
||||
if (num_threads < 1) num_threads = blas_num_threads;
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
if (num_threads == 1) {
|
||||
if (blas_cpu_number == 1){
|
||||
//OpenBLAS is already single thread.
|
||||
return;
|
||||
}else{
|
||||
//From multi-threads to single thread
|
||||
//Restore the original affinity mask
|
||||
gotoblas_set_affinity(-1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
|
||||
|
||||
if (num_threads > blas_num_threads) {
|
||||
|
@ -800,6 +813,13 @@ void goto_set_num_threads(int num_threads) {
|
|||
UNLOCK_COMMAND(&server_lock);
|
||||
}
|
||||
|
||||
#ifndef NO_AFFINITY
|
||||
if(blas_cpu_number == 1 && num_threads > 1){
|
||||
//Restore the thread 0 affinity.
|
||||
gotoblas_set_affinity(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
blas_cpu_number = num_threads;
|
||||
|
||||
#if defined(ARCH_MIPS64)
|
||||
|
|
|
@ -185,7 +185,7 @@ int get_num_procs(void) {
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
#if defined(OS_FREEBSD)
|
||||
|
||||
int get_num_procs(void) {
|
||||
|
||||
|
@ -206,6 +206,18 @@ int get_num_procs(void) {
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(OS_DARWIN)
|
||||
int get_num_procs(void) {
|
||||
static int nums = 0;
|
||||
size_t len;
|
||||
if (nums == 0){
|
||||
len = sizeof(int);
|
||||
sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0);
|
||||
}
|
||||
return nums;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
OpenBLAS uses the numbers of CPU cores in multithreading.
|
||||
It can be set by openblas_set_num_threads(int num_threads);
|
||||
|
|
|
@ -119,7 +119,8 @@ so : ../$(LIBSONAME)
|
|||
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), FreeBSD)
|
||||
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
|
||||
|
||||
so : ../$(LIBSONAME)
|
||||
|
||||
|
|
|
@ -2669,7 +2669,8 @@
|
|||
if ($ARGV[5] == 1) {
|
||||
#NO_LAPACK=1
|
||||
@underscore_objs = (@blasobjs, @misc_underscore_objs);
|
||||
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1") {
|
||||
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
|
||||
-d "../lapack-3.4.2") {
|
||||
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
|
||||
} else {
|
||||
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);
|
||||
|
|
|
@ -47,7 +47,7 @@
|
|||
|
||||
#ifndef WINDOWS_ABI
|
||||
|
||||
#define STACKSIZE 64
|
||||
#define STACKSIZE 128
|
||||
|
||||
#define OLD_M %rdi
|
||||
#define OLD_N %rsi
|
||||
|
@ -57,7 +57,10 @@
|
|||
#define STACK_Y 16 + STACKSIZE(%rsp)
|
||||
#define STACK_INCY 24 + STACKSIZE(%rsp)
|
||||
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
|
||||
|
||||
#define MMM 56(%rsp)
|
||||
#define NN 64(%rsp)
|
||||
#define AA 72(%rsp)
|
||||
#define LDAX 80(%rsp)
|
||||
#else
|
||||
|
||||
#define STACKSIZE 256
|
||||
|
@ -132,27 +135,11 @@
|
|||
movq OLD_LDA, LDA
|
||||
movq OLD_X, X
|
||||
#else
|
||||
movq OLD_M, M
|
||||
movq OLD_N, N
|
||||
movq OLD_A, A
|
||||
movq OLD_LDA, LDA
|
||||
movq OLD_M, MMM
|
||||
movq OLD_N, NN
|
||||
movq OLD_A, AA
|
||||
movq OLD_LDA, LDAX
|
||||
#endif
|
||||
|
||||
movq STACK_INCX, INCX
|
||||
movq STACK_Y, Y
|
||||
movq STACK_INCY, INCY
|
||||
movq STACK_BUFFER, BUFFER
|
||||
|
||||
leaq -1(INCX), %rax
|
||||
|
||||
leaq (,LDA, SIZE), LDA
|
||||
leaq (,INCX, SIZE), INCX
|
||||
leaq (,INCY, SIZE), INCY
|
||||
|
||||
leaq (LDA, LDA, 2), LDA3
|
||||
|
||||
subq $-16 * SIZE, A
|
||||
|
||||
#ifdef HAVE_SSE3
|
||||
#ifndef WINDOWS_ABI
|
||||
movddup %xmm0, ALPHA
|
||||
|
@ -168,6 +155,39 @@
|
|||
unpcklpd ALPHA, ALPHA
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
.L0x:
|
||||
xorq M,M
|
||||
addq $1,M
|
||||
salq $22,M
|
||||
subq M,MMM
|
||||
jge .L00
|
||||
|
||||
movq MMM,%rax
|
||||
addq M,%rax
|
||||
jle .L999x
|
||||
movq %rax,M
|
||||
|
||||
.L00:
|
||||
movq LDAX,LDA
|
||||
movq NN,N
|
||||
movq AA,A
|
||||
movq STACK_INCX, INCX
|
||||
movq STACK_Y, Y
|
||||
movq STACK_INCY, INCY
|
||||
movq STACK_BUFFER, BUFFER
|
||||
|
||||
leaq -1(INCX), %rax
|
||||
|
||||
leaq (,LDA, SIZE), LDA
|
||||
leaq (,INCX, SIZE), INCX
|
||||
leaq (,INCY, SIZE), INCY
|
||||
|
||||
leaq (LDA, LDA, 2), LDA3
|
||||
|
||||
subq $-16 * SIZE, A
|
||||
|
||||
testq M, M
|
||||
jle .L999
|
||||
testq N, N
|
||||
|
@ -854,7 +874,6 @@
|
|||
|
||||
.L21:
|
||||
#endif
|
||||
|
||||
subq $4, N
|
||||
|
||||
leaq 16 * SIZE(BUFFER), X1
|
||||
|
@ -2461,6 +2480,12 @@
|
|||
ALIGN_4
|
||||
|
||||
.L999:
|
||||
leaq (, M, SIZE), %rax
|
||||
addq %rax,AA
|
||||
jmp .L0x;
|
||||
ALIGN_4
|
||||
|
||||
.L999x:
|
||||
movq 0(%rsp), %rbx
|
||||
movq 8(%rsp), %rbp
|
||||
movq 16(%rsp), %r12
|
||||
|
|
36
param.h
36
param.h
|
@ -1664,26 +1664,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define XGEMM_DEFAULT_UNROLL_M 1
|
||||
#endif
|
||||
|
||||
#define SGEMM_P sgemm_p
|
||||
#define DGEMM_P dgemm_p
|
||||
#define QGEMM_P qgemm_p
|
||||
#define CGEMM_P cgemm_p
|
||||
#define ZGEMM_P zgemm_p
|
||||
#define XGEMM_P xgemm_p
|
||||
#define SGEMM_DEFAULT_P sgemm_p
|
||||
#define DGEMM_DEFAULT_P dgemm_p
|
||||
#define QGEMM_DEFAULT_P qgemm_p
|
||||
#define CGEMM_DEFAULT_P cgemm_p
|
||||
#define ZGEMM_DEFAULT_P zgemm_p
|
||||
#define XGEMM_DEFAULT_P xgemm_p
|
||||
|
||||
#define SGEMM_R sgemm_r
|
||||
#define DGEMM_R dgemm_r
|
||||
#define QGEMM_R qgemm_r
|
||||
#define CGEMM_R cgemm_r
|
||||
#define ZGEMM_R zgemm_r
|
||||
#define XGEMM_R xgemm_r
|
||||
#define SGEMM_DEFAULT_R sgemm_r
|
||||
#define DGEMM_DEFAULT_R dgemm_r
|
||||
#define QGEMM_DEFAULT_R qgemm_r
|
||||
#define CGEMM_DEFAULT_R cgemm_r
|
||||
#define ZGEMM_DEFAULT_R zgemm_r
|
||||
#define XGEMM_DEFAULT_R xgemm_r
|
||||
|
||||
#define SGEMM_Q 128
|
||||
#define DGEMM_Q 128
|
||||
#define QGEMM_Q 128
|
||||
#define CGEMM_Q 128
|
||||
#define ZGEMM_Q 128
|
||||
#define XGEMM_Q 128
|
||||
#define SGEMM_DEFAULT_Q 128
|
||||
#define DGEMM_DEFAULT_Q 128
|
||||
#define QGEMM_DEFAULT_Q 128
|
||||
#define CGEMM_DEFAULT_Q 128
|
||||
#define ZGEMM_DEFAULT_Q 128
|
||||
#define XGEMM_DEFAULT_Q 128
|
||||
|
||||
#define SYMV_P 16
|
||||
|
||||
|
|
|
@ -899,19 +899,64 @@ diff -ruN lapack-3.4.2.old/TESTING/LIN/Makefile lapack-3.4.2/TESTING/LIN/Makefil
|
|||
../xlintsts: xlintsts
|
||||
mv xlintsts $@
|
||||
diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefile
|
||||
--- lapack-3.4.2.old/lapacke/src/Makefile 2012-04-02 22:16:32 +0200
|
||||
+++ lapack-3.4.2/lapacke/src/Makefile 2012-04-22 21:38:38 +0200
|
||||
@@ -2041,19 +2041,21 @@
|
||||
--- lapack-3.4.2.old/lapacke/src/Makefile 2012-09-21 04:21:29 +0200
|
||||
+++ lapack-3.4.2/lapacke/src/Makefile 2012-10-15 22:04:56 +0200
|
||||
@@ -34,7 +34,7 @@
|
||||
#
|
||||
include ../../make.inc
|
||||
|
||||
-SRC_OBJ = \
|
||||
+CSRC_OBJ = \
|
||||
lapacke_cbbcsd.o \
|
||||
lapacke_cbbcsd_work.o \
|
||||
lapacke_cbdsqr.o \
|
||||
@@ -526,7 +526,9 @@
|
||||
lapacke_cupgtr.o \
|
||||
lapacke_cupgtr_work.o \
|
||||
lapacke_cupmtr.o \
|
||||
-lapacke_cupmtr_work.o \
|
||||
+lapacke_cupmtr_work.o
|
||||
+
|
||||
+DSRC_OBJ = \
|
||||
lapacke_dbbcsd.o \
|
||||
lapacke_dbbcsd_work.o \
|
||||
lapacke_dbdsdc.o \
|
||||
@@ -1012,7 +1014,9 @@
|
||||
lapacke_dtrttp.o \
|
||||
lapacke_dtrttp_work.o \
|
||||
lapacke_dtzrzf.o \
|
||||
-lapacke_dtzrzf_work.o \
|
||||
+lapacke_dtzrzf_work.o
|
||||
+
|
||||
+SSRC_OBJ = \
|
||||
lapacke_sbbcsd.o \
|
||||
lapacke_sbbcsd_work.o \
|
||||
lapacke_sbdsdc.o \
|
||||
@@ -1492,7 +1496,9 @@
|
||||
lapacke_strttp.o \
|
||||
lapacke_strttp_work.o \
|
||||
lapacke_stzrzf.o \
|
||||
-lapacke_stzrzf_work.o \
|
||||
+lapacke_stzrzf_work.o
|
||||
+
|
||||
+ZSRC_OBJ = \
|
||||
lapacke_zbbcsd.o \
|
||||
lapacke_zbbcsd_work.o \
|
||||
lapacke_zbdsqr.o \
|
||||
@@ -2041,19 +2047,29 @@
|
||||
lapacke_zlagsy.o \
|
||||
lapacke_zlagsy_work.o
|
||||
|
||||
-ALLOBJ = $(SRC_OBJ) $(MATGEN_OBJ)
|
||||
+OBJ_FILES := $(SRC_OBJ)
|
||||
+COBJ_FILES := $(CSRC_OBJ)
|
||||
+SOBJ_FILES := $(SSRC_OBJ)
|
||||
+DOBJ_FILES := $(DSRC_OBJ)
|
||||
+ZOBJ_FILES := $(ZSRC_OBJ)
|
||||
|
||||
-ifdef USEXBLAS
|
||||
-ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||
+ifdef LAPACKE_EXTENDED
|
||||
+OBJ_FILES += $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
|
||||
+OBJ_FILES += $(SRCX_OBJ)
|
||||
endif
|
||||
|
||||
-
|
||||
|
@ -924,9 +969,32 @@ diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefil
|
|||
|
||||
-../../$(LAPACKELIB): $(ALLOBJ) $(ALLXOBJ)
|
||||
- $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJ) $(ALLXOBJ)
|
||||
+../../$(LAPACKELIB): $(OBJ_FILES)
|
||||
+# http://hackage.haskell.org/trac/gtk2hs/ticket/1146
|
||||
+ echo $(OBJ_FILES) | xargs -n 100 $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB)
|
||||
+../../$(LAPACKELIB): $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES)
|
||||
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(COBJ_FILES)
|
||||
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(DOBJ_FILES)
|
||||
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(SOBJ_FILES)
|
||||
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ZOBJ_FILES)
|
||||
+ifneq ($(strip $(OBJ_FILES)),)
|
||||
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(OBJ_FILES)
|
||||
+endif
|
||||
$(RANLIB) ../../$(LAPACKELIB)
|
||||
|
||||
.c.o:
|
||||
diff -ruN lapack-3.4.2.old/lapacke/example/Makefile lapack-3.4.2/lapacke/example/Makefile
|
||||
--- lapack-3.4.2.old/lapacke/example/Makefile 2012-03-23 06:55:22.000000000 +0800
|
||||
+++ lapack-3.4.2/lapacke/example/Makefile 2012-11-13 00:32:24.125449952 +0800
|
||||
@@ -4,12 +4,12 @@
|
||||
|
||||
xexample_DGESV_rowmajor: example_DGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB)
|
||||
$(LOADER) $(LOADOPTS) example_DGESV_rowmajor.o \
|
||||
- ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(LAPACKELIB) $(CEXTRALIB) -o $@
|
||||
./$@
|
||||
|
||||
xexample_ZGESV_rowmajor: example_ZGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB)
|
||||
$(LOADER) $(LOADOPTS) example_ZGESV_rowmajor.o \
|
||||
- ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
|
||||
+ ../../$(LAPACKELIB) $(CEXTRALIB) -o $@
|
||||
./$@
|
||||
|
||||
.c.o:
|
||||
|
|
Loading…
Reference in New Issue