Merge branch 'develop'

This commit is contained in:
Zhang Xianyi 2012-11-26 17:32:56 +08:00
commit f78eb335d6
14 changed files with 217 additions and 57 deletions

2
.gitignore vendored
View File

@ -8,6 +8,8 @@ lapack-3.1.1
lapack-3.1.1.tgz
lapack-3.4.1
lapack-3.4.1.tgz
lapack-3.4.2
lapack-3.4.2.tgz
*.so
*.a
.svn

View File

@ -1,4 +1,20 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.5
26-Nov-2012
common:
* Added NO_SHARED flag to disable generating the shared library.
* Compile LAPACKE with ILP64 modle when INTERFACE64=1 (#158)
* Export LAPACK 3.4.2 symbols in shared library. (#147)
* Only detect the number of physical CPU cores on Mac OSX. (#157)
* Fixed NetBSD build. (#155)
* Fixed compilation with TARGET=GENERIC. (#160)
x86/x86-64:
* Restore the original CPU affinity when calling
openblas_set_num_threads(1) (#153)
* Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154)
MIPS64:
====================================================================
Version 0.2.4
8-Oct-2012

View File

@ -80,6 +80,7 @@ endif
@echo
shared :
ifndef NO_SHARED
ifeq ($(OSNAME), Linux)
$(MAKE) -C exports so
-ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ -103,6 +104,7 @@ endif
ifeq ($(OSNAME), CYGWIN_NT)
$(MAKE) -C exports dll
endif
endif
tests :
ifndef NOFORTRAN
@ -222,7 +224,11 @@ ifndef NOFORTRAN
-@echo "PNOOPT = $(FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifdef INTERFACE64
-@echo "CFLAGS = $(CFLAGS) -DHAVE_LAPACK_CONFIG_H -DLAPACK_ILP64" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "CFLAGS = $(CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc

View File

@ -71,11 +71,9 @@ ifeq ($(OSNAME), Darwin)
endif
ifeq ($(OSNAME), WINNT)
-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll
endif
ifeq ($(OSNAME), CYGWIN_NT)
-cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)
-ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll
endif
@echo Install OK!

View File

@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.2.4
VERSION = 0.2.5
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
@ -24,10 +24,13 @@ VERSION = 0.2.4
# Fortran compiler. Default is g77.
# FC = gfortran
# Even you can specify cross compiler
# Even you can specify cross compiler. Meanwhile, please set HOSTCC.
# CC = x86_64-w64-mingw32-gcc
# FC = x86_64-w64-mingw32-gfortran
# If you use the cross compiler, please set this host compiler.
# HOSTCC = gcc
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
# BINARY=64
@ -45,6 +48,9 @@ VERSION = 0.2.4
# automatically detected by the the script.
# NUM_THREADS = 24
# if you don't need generate the shared library, please comment it in.
# NO_SHARED = 1
# If you don't need CBLAS interface, please comment it in.
# NO_CBLAS = 1

View File

@ -129,7 +129,7 @@ MD5SUM = md5 -r
endif
ifeq ($(OSNAME), NetBSD)
MD5SUM = md5 -r
MD5SUM = md5 -n
endif
ifeq ($(OSNAME), Linux)

View File

@ -351,7 +351,12 @@ typedef int blasint;
#endif
#define MMAP_ACCESS (PROT_READ | PROT_WRITE)
#ifdef __NetBSD__
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANON)
#else
#define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS)
#endif
#include "param.h"
#include "common_param.h"

View File

@ -770,6 +770,19 @@ void goto_set_num_threads(int num_threads) {
if (num_threads < 1) num_threads = blas_num_threads;
#ifndef NO_AFFINITY
if (num_threads == 1) {
if (blas_cpu_number == 1){
//OpenBLAS is already single thread.
return;
}else{
//From multi-threads to single thread
//Restore the original affinity mask
gotoblas_set_affinity(-1);
}
}
#endif
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
if (num_threads > blas_num_threads) {
@ -800,6 +813,13 @@ void goto_set_num_threads(int num_threads) {
UNLOCK_COMMAND(&server_lock);
}
#ifndef NO_AFFINITY
if(blas_cpu_number == 1 && num_threads > 1){
//Restore the thread 0 affinity.
gotoblas_set_affinity(0);
}
#endif
blas_cpu_number = num_threads;
#if defined(ARCH_MIPS64)

View File

@ -185,7 +185,7 @@ int get_num_procs(void) {
#endif
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
#if defined(OS_FREEBSD)
int get_num_procs(void) {
@ -206,6 +206,18 @@ int get_num_procs(void) {
#endif
#if defined(OS_DARWIN)
int get_num_procs(void) {
static int nums = 0;
size_t len;
if (nums == 0){
len = sizeof(int);
sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0);
}
return nums;
}
#endif
/*
OpenBLAS uses the numbers of CPU cores in multithreading.
It can be set by openblas_set_num_threads(int num_threads);

View File

@ -119,7 +119,8 @@ so : ../$(LIBSONAME)
endif
ifeq ($(OSNAME), FreeBSD)
#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or
ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD))
so : ../$(LIBSONAME)

View File

@ -2669,7 +2669,8 @@
if ($ARGV[5] == 1) {
#NO_LAPACK=1
@underscore_objs = (@blasobjs, @misc_underscore_objs);
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1") {
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" ||
-d "../lapack-3.4.2") {
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs);
} else {
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs);

View File

@ -47,7 +47,7 @@
#ifndef WINDOWS_ABI
#define STACKSIZE 64
#define STACKSIZE 128
#define OLD_M %rdi
#define OLD_N %rsi
@ -57,7 +57,10 @@
#define STACK_Y 16 + STACKSIZE(%rsp)
#define STACK_INCY 24 + STACKSIZE(%rsp)
#define STACK_BUFFER 32 + STACKSIZE(%rsp)
#define MMM 56(%rsp)
#define NN 64(%rsp)
#define AA 72(%rsp)
#define LDAX 80(%rsp)
#else
#define STACKSIZE 256
@ -132,27 +135,11 @@
movq OLD_LDA, LDA
movq OLD_X, X
#else
movq OLD_M, M
movq OLD_N, N
movq OLD_A, A
movq OLD_LDA, LDA
movq OLD_M, MMM
movq OLD_N, NN
movq OLD_A, AA
movq OLD_LDA, LDAX
#endif
movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
leaq -1(INCX), %rax
leaq (,LDA, SIZE), LDA
leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (LDA, LDA, 2), LDA3
subq $-16 * SIZE, A
#ifdef HAVE_SSE3
#ifndef WINDOWS_ABI
movddup %xmm0, ALPHA
@ -168,6 +155,39 @@
unpcklpd ALPHA, ALPHA
#endif
.L0x:
xorq M,M
addq $1,M
salq $22,M
subq M,MMM
jge .L00
movq MMM,%rax
addq M,%rax
jle .L999x
movq %rax,M
.L00:
movq LDAX,LDA
movq NN,N
movq AA,A
movq STACK_INCX, INCX
movq STACK_Y, Y
movq STACK_INCY, INCY
movq STACK_BUFFER, BUFFER
leaq -1(INCX), %rax
leaq (,LDA, SIZE), LDA
leaq (,INCX, SIZE), INCX
leaq (,INCY, SIZE), INCY
leaq (LDA, LDA, 2), LDA3
subq $-16 * SIZE, A
testq M, M
jle .L999
testq N, N
@ -854,7 +874,6 @@
.L21:
#endif
subq $4, N
leaq 16 * SIZE(BUFFER), X1
@ -2461,6 +2480,12 @@
ALIGN_4
.L999:
leaq (, M, SIZE), %rax
addq %rax,AA
jmp .L0x;
ALIGN_4
.L999x:
movq 0(%rsp), %rbx
movq 8(%rsp), %rbp
movq 16(%rsp), %r12

36
param.h
View File

@ -1664,26 +1664,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_UNROLL_M 1
#endif
#define SGEMM_P sgemm_p
#define DGEMM_P dgemm_p
#define QGEMM_P qgemm_p
#define CGEMM_P cgemm_p
#define ZGEMM_P zgemm_p
#define XGEMM_P xgemm_p
#define SGEMM_DEFAULT_P sgemm_p
#define DGEMM_DEFAULT_P dgemm_p
#define QGEMM_DEFAULT_P qgemm_p
#define CGEMM_DEFAULT_P cgemm_p
#define ZGEMM_DEFAULT_P zgemm_p
#define XGEMM_DEFAULT_P xgemm_p
#define SGEMM_R sgemm_r
#define DGEMM_R dgemm_r
#define QGEMM_R qgemm_r
#define CGEMM_R cgemm_r
#define ZGEMM_R zgemm_r
#define XGEMM_R xgemm_r
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R dgemm_r
#define QGEMM_DEFAULT_R qgemm_r
#define CGEMM_DEFAULT_R cgemm_r
#define ZGEMM_DEFAULT_R zgemm_r
#define XGEMM_DEFAULT_R xgemm_r
#define SGEMM_Q 128
#define DGEMM_Q 128
#define QGEMM_Q 128
#define CGEMM_Q 128
#define ZGEMM_Q 128
#define XGEMM_Q 128
#define SGEMM_DEFAULT_Q 128
#define DGEMM_DEFAULT_Q 128
#define QGEMM_DEFAULT_Q 128
#define CGEMM_DEFAULT_Q 128
#define ZGEMM_DEFAULT_Q 128
#define XGEMM_DEFAULT_Q 128
#define SYMV_P 16

View File

@ -899,19 +899,64 @@ diff -ruN lapack-3.4.2.old/TESTING/LIN/Makefile lapack-3.4.2/TESTING/LIN/Makefil
../xlintsts: xlintsts
mv xlintsts $@
diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefile
--- lapack-3.4.2.old/lapacke/src/Makefile 2012-04-02 22:16:32 +0200
+++ lapack-3.4.2/lapacke/src/Makefile 2012-04-22 21:38:38 +0200
@@ -2041,19 +2041,21 @@
--- lapack-3.4.2.old/lapacke/src/Makefile 2012-09-21 04:21:29 +0200
+++ lapack-3.4.2/lapacke/src/Makefile 2012-10-15 22:04:56 +0200
@@ -34,7 +34,7 @@
#
include ../../make.inc
-SRC_OBJ = \
+CSRC_OBJ = \
lapacke_cbbcsd.o \
lapacke_cbbcsd_work.o \
lapacke_cbdsqr.o \
@@ -526,7 +526,9 @@
lapacke_cupgtr.o \
lapacke_cupgtr_work.o \
lapacke_cupmtr.o \
-lapacke_cupmtr_work.o \
+lapacke_cupmtr_work.o
+
+DSRC_OBJ = \
lapacke_dbbcsd.o \
lapacke_dbbcsd_work.o \
lapacke_dbdsdc.o \
@@ -1012,7 +1014,9 @@
lapacke_dtrttp.o \
lapacke_dtrttp_work.o \
lapacke_dtzrzf.o \
-lapacke_dtzrzf_work.o \
+lapacke_dtzrzf_work.o
+
+SSRC_OBJ = \
lapacke_sbbcsd.o \
lapacke_sbbcsd_work.o \
lapacke_sbdsdc.o \
@@ -1492,7 +1496,9 @@
lapacke_strttp.o \
lapacke_strttp_work.o \
lapacke_stzrzf.o \
-lapacke_stzrzf_work.o \
+lapacke_stzrzf_work.o
+
+ZSRC_OBJ = \
lapacke_zbbcsd.o \
lapacke_zbbcsd_work.o \
lapacke_zbdsqr.o \
@@ -2041,19 +2047,29 @@
lapacke_zlagsy.o \
lapacke_zlagsy_work.o
-ALLOBJ = $(SRC_OBJ) $(MATGEN_OBJ)
+OBJ_FILES := $(SRC_OBJ)
+COBJ_FILES := $(CSRC_OBJ)
+SOBJ_FILES := $(SSRC_OBJ)
+DOBJ_FILES := $(DSRC_OBJ)
+ZOBJ_FILES := $(ZSRC_OBJ)
-ifdef USEXBLAS
-ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
+ifdef LAPACKE_EXTENDED
+OBJ_FILES += $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
+OBJ_FILES += $(SRCX_OBJ)
endif
-
@ -924,9 +969,32 @@ diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefil
-../../$(LAPACKELIB): $(ALLOBJ) $(ALLXOBJ)
- $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJ) $(ALLXOBJ)
+../../$(LAPACKELIB): $(OBJ_FILES)
+# http://hackage.haskell.org/trac/gtk2hs/ticket/1146
+ echo $(OBJ_FILES) | xargs -n 100 $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB)
+../../$(LAPACKELIB): $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES)
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(COBJ_FILES)
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(DOBJ_FILES)
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(SOBJ_FILES)
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ZOBJ_FILES)
+ifneq ($(strip $(OBJ_FILES)),)
+ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(OBJ_FILES)
+endif
$(RANLIB) ../../$(LAPACKELIB)
.c.o:
diff -ruN lapack-3.4.2.old/lapacke/example/Makefile lapack-3.4.2/lapacke/example/Makefile
--- lapack-3.4.2.old/lapacke/example/Makefile 2012-03-23 06:55:22.000000000 +0800
+++ lapack-3.4.2/lapacke/example/Makefile 2012-11-13 00:32:24.125449952 +0800
@@ -4,12 +4,12 @@
xexample_DGESV_rowmajor: example_DGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB)
$(LOADER) $(LOADOPTS) example_DGESV_rowmajor.o \
- ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(LAPACKELIB) $(CEXTRALIB) -o $@
./$@
xexample_ZGESV_rowmajor: example_ZGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB)
$(LOADER) $(LOADOPTS) example_ZGESV_rowmajor.o \
- ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@
+ ../../$(LAPACKELIB) $(CEXTRALIB) -o $@
./$@
.c.o: