From ca4136cf416157b8e8f5852c348b9bc8f361af4c Mon Sep 17 00:00:00 2001 From: Zaheer Chothia Date: Fri, 12 Oct 2012 23:44:23 +0200 Subject: [PATCH 01/13] Fixed #147: LAPACK symbols were not being exported for version 3.4.2 --- .gitignore | 2 ++ exports/gensymbol | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 118205ca2..aaa1b31ad 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ lapack-3.1.1 lapack-3.1.1.tgz lapack-3.4.1 lapack-3.4.1.tgz +lapack-3.4.2 +lapack-3.4.2.tgz *.so *.a .svn diff --git a/exports/gensymbol b/exports/gensymbol index 64c92d396..c492eefb5 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -2669,7 +2669,8 @@ if ($ARGV[5] == 1) { #NO_LAPACK=1 @underscore_objs = (@blasobjs, @misc_underscore_objs); -} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1") { +} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || + -d "../lapack-3.4.2") { @underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); } else { @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); From 5c1efa1149c0b18dbb455cd5659d7f655d5a6cf2 Mon Sep 17 00:00:00 2001 From: Zaheer Chothia Date: Mon, 15 Oct 2012 22:13:37 +0200 Subject: [PATCH 02/13] Fix installation step on Windows (regression from e8306f623a) Since the DLL now has a fixed name there is no need to install a versioned alias too. --- Makefile.install | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile.install b/Makefile.install index 7f30d6b7c..87730a10c 100644 --- a/Makefile.install +++ b/Makefile.install @@ -71,11 +71,9 @@ ifeq ($(OSNAME), Darwin) endif ifeq ($(OSNAME), WINNT) -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) - -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll endif ifeq ($(OSNAME), CYGWIN_NT) -cp $(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR) - -ln -fs $(OPENBLAS_LIBRARY_DIR)/$(LIBDLLNAME) $(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).dll endif @echo Install OK! From 0f26a21624ac6d5b3ee2bc913f74edc6bd611ad6 Mon Sep 17 00:00:00 2001 From: Zaheer Chothia Date: Mon, 15 Oct 2012 22:26:18 +0200 Subject: [PATCH 03/13] Alternative approach to avoid command-line length while archiving lapacke -- Thanks Michel! --- patch.for_lapack-3.4.2 | 66 +++++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 8 deletions(-) diff --git a/patch.for_lapack-3.4.2 b/patch.for_lapack-3.4.2 index f6c85c74b..b1edcb84d 100644 --- a/patch.for_lapack-3.4.2 +++ b/patch.for_lapack-3.4.2 @@ -899,19 +899,64 @@ diff -ruN lapack-3.4.2.old/TESTING/LIN/Makefile lapack-3.4.2/TESTING/LIN/Makefil ../xlintsts: xlintsts mv xlintsts $@ diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefile ---- lapack-3.4.2.old/lapacke/src/Makefile 2012-04-02 22:16:32 +0200 -+++ lapack-3.4.2/lapacke/src/Makefile 2012-04-22 21:38:38 +0200 -@@ -2041,19 +2041,21 @@ +--- lapack-3.4.2.old/lapacke/src/Makefile 2012-09-21 04:21:29 +0200 ++++ lapack-3.4.2/lapacke/src/Makefile 2012-10-15 22:04:56 +0200 +@@ -34,7 +34,7 @@ + # + include ../../make.inc + +-SRC_OBJ = \ ++CSRC_OBJ = \ + lapacke_cbbcsd.o \ + lapacke_cbbcsd_work.o \ + lapacke_cbdsqr.o \ +@@ -526,7 +526,9 @@ + lapacke_cupgtr.o \ + lapacke_cupgtr_work.o \ + lapacke_cupmtr.o \ +-lapacke_cupmtr_work.o \ ++lapacke_cupmtr_work.o ++ ++DSRC_OBJ = \ + lapacke_dbbcsd.o \ + lapacke_dbbcsd_work.o \ + lapacke_dbdsdc.o \ +@@ -1012,7 +1014,9 @@ + lapacke_dtrttp.o \ + lapacke_dtrttp_work.o \ + lapacke_dtzrzf.o \ +-lapacke_dtzrzf_work.o \ ++lapacke_dtzrzf_work.o ++ ++SSRC_OBJ = \ + lapacke_sbbcsd.o \ + lapacke_sbbcsd_work.o \ + lapacke_sbdsdc.o \ +@@ -1492,7 +1496,9 @@ + lapacke_strttp.o \ + lapacke_strttp_work.o \ + lapacke_stzrzf.o \ +-lapacke_stzrzf_work.o \ ++lapacke_stzrzf_work.o ++ ++ZSRC_OBJ = \ + lapacke_zbbcsd.o \ + lapacke_zbbcsd_work.o \ + lapacke_zbdsqr.o \ +@@ -2041,19 +2047,29 @@ lapacke_zlagsy.o \ lapacke_zlagsy_work.o -ALLOBJ = $(SRC_OBJ) $(MATGEN_OBJ) -+OBJ_FILES := $(SRC_OBJ) ++COBJ_FILES := $(CSRC_OBJ) ++SOBJ_FILES := $(SSRC_OBJ) ++DOBJ_FILES := $(DSRC_OBJ) ++ZOBJ_FILES := $(ZSRC_OBJ) -ifdef USEXBLAS -ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) +ifdef LAPACKE_EXTENDED -+OBJ_FILES += $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) ++OBJ_FILES += $(SRCX_OBJ) endif - @@ -924,9 +969,14 @@ diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefil -../../$(LAPACKELIB): $(ALLOBJ) $(ALLXOBJ) - $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ALLOBJ) $(ALLXOBJ) -+../../$(LAPACKELIB): $(OBJ_FILES) -+# http://hackage.haskell.org/trac/gtk2hs/ticket/1146 -+ echo $(OBJ_FILES) | xargs -n 100 $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) ++../../$(LAPACKELIB): $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES) ++ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(COBJ_FILES) ++ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(DOBJ_FILES) ++ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(SOBJ_FILES) ++ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(ZOBJ_FILES) ++ifneq ($(strip $(OBJ_FILES)),) ++ $(ARCH) $(ARCHFLAGS) ../../$(LAPACKELIB) $(OBJ_FILES) ++endif $(RANLIB) ../../$(LAPACKELIB) .c.o: From 538c764d2b4b030503c4dd86b786f90dd6e12c4c Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 6 Nov 2012 18:21:46 +0800 Subject: [PATCH 04/13] Refs #153. Restore the original CPU affinity when calling openblas_set_num_threads(1). Please read the issue on github.com for the detail. --- driver/others/blas_server.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index f16b827d3..c51e681a5 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -770,6 +770,19 @@ void goto_set_num_threads(int num_threads) { if (num_threads < 1) num_threads = blas_num_threads; +#ifndef NO_AFFINITY + if (num_threads == 1) { + if (blas_cpu_number == 1){ + //OpenBLAS is already single thread. + return; + }else{ + //From multi-threads to single thread + //Restore the original affinity mask + gotoblas_set_affinity(-1); + } + } +#endif + if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; if (num_threads > blas_num_threads) { @@ -800,6 +813,13 @@ void goto_set_num_threads(int num_threads) { UNLOCK_COMMAND(&server_lock); } +#ifndef NO_AFFINITY + if(blas_cpu_number == 1 && num_threads > 1){ + //Restore the thread 0 affinity. + gotoblas_set_affinity(0); + } +#endif + blas_cpu_number = num_threads; #if defined(ARCH_MIPS64) From 378acfe826e2759b1464bd9f9d7c02e22e14ea05 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 8 Nov 2012 22:08:01 +0800 Subject: [PATCH 05/13] Added NO_SHARED flag to disable generating the shared library. --- Makefile | 2 ++ Makefile.rule | 3 +++ 2 files changed, 5 insertions(+) diff --git a/Makefile b/Makefile index 9a2a8b765..c49a328d9 100644 --- a/Makefile +++ b/Makefile @@ -80,6 +80,7 @@ endif @echo shared : +ifndef NO_SHARED ifeq ($(OSNAME), Linux) $(MAKE) -C exports so -ln -fs $(LIBSONAME) $(LIBPREFIX).so @@ -103,6 +104,7 @@ endif ifeq ($(OSNAME), CYGWIN_NT) $(MAKE) -C exports dll endif +endif tests : ifndef NOFORTRAN diff --git a/Makefile.rule b/Makefile.rule index 077bf9267..f667c1661 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -45,6 +45,9 @@ VERSION = 0.2.4 # automatically detected by the the script. # NUM_THREADS = 24 +# if you don't need generate the shared library, please comment it in. +# NO_SHARED = 1 + # If you don't need CBLAS interface, please comment it in. # NO_CBLAS = 1 From 789f205177b6beee41e56f007bdb2a25cc1a0cb7 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Thu, 8 Nov 2012 22:15:04 +0800 Subject: [PATCH 06/13] Improved Makefile.rule for cross compiler. --- Makefile.rule | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index f667c1661..debc91dcf 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -24,10 +24,13 @@ VERSION = 0.2.4 # Fortran compiler. Default is g77. # FC = gfortran -# Even you can specify cross compiler +# Even you can specify cross compiler. Meanwhile, please set HOSTCC. # CC = x86_64-w64-mingw32-gcc # FC = x86_64-w64-mingw32-gfortran +# If you use the cross compiler, please set this host compiler. +# HOSTCC = gcc + # If you need 32bit binary, define BINARY=32, otherwise define BINARY=64 # BINARY=64 From e85549ee1171e9a4fc28a0a09dec49f87741fa51 Mon Sep 17 00:00:00 2001 From: Alexander Nasonov Date: Sat, 10 Nov 2012 23:20:44 +0000 Subject: [PATCH 07/13] Fix NetBSD build. --- Makefile.system | 2 +- common.h | 5 +++++ exports/Makefile | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Makefile.system b/Makefile.system index 5aaf84609..27f30fa61 100644 --- a/Makefile.system +++ b/Makefile.system @@ -129,7 +129,7 @@ MD5SUM = md5 -r endif ifeq ($(OSNAME), NetBSD) -MD5SUM = md5 -r +MD5SUM = md5 -n endif ifeq ($(OSNAME), Linux) diff --git a/common.h b/common.h index b4dc5deba..003fde77f 100644 --- a/common.h +++ b/common.h @@ -351,7 +351,12 @@ typedef int blasint; #endif #define MMAP_ACCESS (PROT_READ | PROT_WRITE) + +#ifdef __NetBSD__ +#define MMAP_POLICY (MAP_PRIVATE | MAP_ANON) +#else #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS) +#endif #include "param.h" #include "common_param.h" diff --git a/exports/Makefile b/exports/Makefile index c507032e9..5219560ee 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -119,7 +119,8 @@ so : ../$(LIBSONAME) endif -ifeq ($(OSNAME), FreeBSD) +#http://stackoverflow.com/questions/7656425/makefile-ifeq-logical-or +ifeq ($(OSNAME), $(filter $(OSNAME),FreeBSD NetBSD)) so : ../$(LIBSONAME) From b45d43d29526591372d0c3eaa05bc7b257c47bd9 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 13 Nov 2012 00:53:26 +0800 Subject: [PATCH 08/13] Added the patch for lapacke example. --- patch.for_lapack-3.4.2 | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/patch.for_lapack-3.4.2 b/patch.for_lapack-3.4.2 index b1edcb84d..3f7d72ed3 100644 --- a/patch.for_lapack-3.4.2 +++ b/patch.for_lapack-3.4.2 @@ -980,3 +980,21 @@ diff -ruN lapack-3.4.2.old/lapacke/src/Makefile lapack-3.4.2/lapacke/src/Makefil $(RANLIB) ../../$(LAPACKELIB) .c.o: +diff -ruN lapack-3.4.2.old/lapacke/example/Makefile lapack-3.4.2/lapacke/example/Makefile +--- lapack-3.4.2.old/lapacke/example/Makefile 2012-03-23 06:55:22.000000000 +0800 ++++ lapack-3.4.2/lapacke/example/Makefile 2012-11-13 00:32:24.125449952 +0800 +@@ -4,12 +4,12 @@ + + xexample_DGESV_rowmajor: example_DGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB) + $(LOADER) $(LOADOPTS) example_DGESV_rowmajor.o \ +- ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ ++ ../../$(LAPACKELIB) $(CEXTRALIB) -o $@ + ./$@ + + xexample_ZGESV_rowmajor: example_ZGESV_rowmajor.o ../../$(LAPACKLIB) ../../$(LAPACKELIB) + $(LOADER) $(LOADOPTS) example_ZGESV_rowmajor.o \ +- ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB) -o $@ ++ ../../$(LAPACKELIB) $(CEXTRALIB) -o $@ + ./$@ + + .c.o: From d5717a97eadd4b1bcba99e3a895aba2a8583d4ac Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 13 Nov 2012 00:54:20 +0800 Subject: [PATCH 09/13] Compile lapacke with ILP64 modle when INTERFACE64=1 --- Makefile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile b/Makefile index c49a328d9..39e3bbd65 100644 --- a/Makefile +++ b/Makefile @@ -224,7 +224,11 @@ ifndef NOFORTRAN -@echo "PNOOPT = $(FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc +ifdef INTERFACE64 + -@echo "CFLAGS = $(CFLAGS) -DHAVE_LAPACK_CONFIG_H -DLAPACK_ILP64" >> $(NETLIB_LAPACK_DIR)/make.inc +else -@echo "CFLAGS = $(CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc +endif -@echo "ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc From 6751f7b9a793e9080396b1fec1739953017e3b8c Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Tue, 13 Nov 2012 15:48:57 +0800 Subject: [PATCH 10/13] Fixed #157. Only detect the number of physical CPU cores on Mac OSX. --- driver/others/memory.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index d897fe7e0..2070adf5d 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -185,7 +185,7 @@ int get_num_procs(void) { #endif -#if defined(OS_FREEBSD) || defined(OS_DARWIN) +#if defined(OS_FREEBSD) int get_num_procs(void) { @@ -206,6 +206,18 @@ int get_num_procs(void) { #endif +#if defined(OS_DARWIN) +int get_num_procs(void) { + static int nums = 0; + size_t len; + if (nums == 0){ + len = sizeof(int); + sysctlbyname("hw.physicalcpu", &nums, &len, NULL, 0); + } + return nums; +} +#endif + /* OpenBLAS uses the numbers of CPU cores in multithreading. It can be set by openblas_set_num_threads(int num_threads); From 01e3c984cef12d881e4cb535f0d4f0045b8b2ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?S=C3=A9bastien=20Villemot?= Date: Wed, 14 Nov 2012 21:04:05 +0100 Subject: [PATCH 11/13] Fix compilation with TARGET=GENERIC Patch applied to Debian package --- param.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/param.h b/param.h index c6cd354be..11c1a269e 100644 --- a/param.h +++ b/param.h @@ -1664,26 +1664,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define XGEMM_DEFAULT_UNROLL_M 1 #endif -#define SGEMM_P sgemm_p -#define DGEMM_P dgemm_p -#define QGEMM_P qgemm_p -#define CGEMM_P cgemm_p -#define ZGEMM_P zgemm_p -#define XGEMM_P xgemm_p +#define SGEMM_DEFAULT_P sgemm_p +#define DGEMM_DEFAULT_P dgemm_p +#define QGEMM_DEFAULT_P qgemm_p +#define CGEMM_DEFAULT_P cgemm_p +#define ZGEMM_DEFAULT_P zgemm_p +#define XGEMM_DEFAULT_P xgemm_p -#define SGEMM_R sgemm_r -#define DGEMM_R dgemm_r -#define QGEMM_R qgemm_r -#define CGEMM_R cgemm_r -#define ZGEMM_R zgemm_r -#define XGEMM_R xgemm_r +#define SGEMM_DEFAULT_R sgemm_r +#define DGEMM_DEFAULT_R dgemm_r +#define QGEMM_DEFAULT_R qgemm_r +#define CGEMM_DEFAULT_R cgemm_r +#define ZGEMM_DEFAULT_R zgemm_r +#define XGEMM_DEFAULT_R xgemm_r -#define SGEMM_Q 128 -#define DGEMM_Q 128 -#define QGEMM_Q 128 -#define CGEMM_Q 128 -#define ZGEMM_Q 128 -#define XGEMM_Q 128 +#define SGEMM_DEFAULT_Q 128 +#define DGEMM_DEFAULT_Q 128 +#define QGEMM_DEFAULT_Q 128 +#define CGEMM_DEFAULT_Q 128 +#define ZGEMM_DEFAULT_Q 128 +#define XGEMM_DEFAULT_Q 128 #define SYMV_P 16 From 5f0117385e1d4f986ad75fa66b873b014a7792c2 Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 19 Nov 2012 22:32:27 +0800 Subject: [PATCH 12/13] Refs #154. Fixed a SEGFAULT bug of dgemv_t when m is very large. It overflowed the internal buffer. Thus, we split vector x into blocks when m is very large. Thank @wangqian for this patch. --- kernel/x86_64/dgemv_t.S | 71 ++++++++++++++++++++++++++++------------- 1 file changed, 48 insertions(+), 23 deletions(-) diff --git a/kernel/x86_64/dgemv_t.S b/kernel/x86_64/dgemv_t.S index 071920723..02601be0a 100644 --- a/kernel/x86_64/dgemv_t.S +++ b/kernel/x86_64/dgemv_t.S @@ -47,7 +47,7 @@ #ifndef WINDOWS_ABI -#define STACKSIZE 64 +#define STACKSIZE 128 #define OLD_M %rdi #define OLD_N %rsi @@ -57,7 +57,10 @@ #define STACK_Y 16 + STACKSIZE(%rsp) #define STACK_INCY 24 + STACKSIZE(%rsp) #define STACK_BUFFER 32 + STACKSIZE(%rsp) - +#define MMM 56(%rsp) +#define NN 64(%rsp) +#define AA 72(%rsp) +#define LDAX 80(%rsp) #else #define STACKSIZE 256 @@ -132,27 +135,11 @@ movq OLD_LDA, LDA movq OLD_X, X #else - movq OLD_M, M - movq OLD_N, N - movq OLD_A, A - movq OLD_LDA, LDA + movq OLD_M, MMM + movq OLD_N, NN + movq OLD_A, AA + movq OLD_LDA, LDAX #endif - - movq STACK_INCX, INCX - movq STACK_Y, Y - movq STACK_INCY, INCY - movq STACK_BUFFER, BUFFER - - leaq -1(INCX), %rax - - leaq (,LDA, SIZE), LDA - leaq (,INCX, SIZE), INCX - leaq (,INCY, SIZE), INCY - - leaq (LDA, LDA, 2), LDA3 - - subq $-16 * SIZE, A - #ifdef HAVE_SSE3 #ifndef WINDOWS_ABI movddup %xmm0, ALPHA @@ -168,6 +155,39 @@ unpcklpd ALPHA, ALPHA #endif + + +.L0x: + xorq M,M + addq $1,M + salq $22,M + subq M,MMM + jge .L00 + + movq MMM,%rax + addq M,%rax + jle .L999x + movq %rax,M + +.L00: + movq LDAX,LDA + movq NN,N + movq AA,A + movq STACK_INCX, INCX + movq STACK_Y, Y + movq STACK_INCY, INCY + movq STACK_BUFFER, BUFFER + + leaq -1(INCX), %rax + + leaq (,LDA, SIZE), LDA + leaq (,INCX, SIZE), INCX + leaq (,INCY, SIZE), INCY + + leaq (LDA, LDA, 2), LDA3 + + subq $-16 * SIZE, A + testq M, M jle .L999 testq N, N @@ -854,7 +874,6 @@ .L21: #endif - subq $4, N leaq 16 * SIZE(BUFFER), X1 @@ -2461,6 +2480,12 @@ ALIGN_4 .L999: + leaq (, M, SIZE), %rax + addq %rax,AA + jmp .L0x; + ALIGN_4 + +.L999x: movq 0(%rsp), %rbx movq 8(%rsp), %rbp movq 16(%rsp), %r12 From 2345bdec6850677dc363d2e1edbc3aa818375d1c Mon Sep 17 00:00:00 2001 From: Zhang Xianyi Date: Mon, 26 Nov 2012 17:32:25 +0800 Subject: [PATCH 13/13] Update the doc for 0.2.5 version. --- Changelog.txt | 16 ++++++++++++++++ Makefile.rule | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/Changelog.txt b/Changelog.txt index 1b0a9b01e..db0732c4f 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,20 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.2.5 +26-Nov-2012 +common: + * Added NO_SHARED flag to disable generating the shared library. + * Compile LAPACKE with ILP64 modle when INTERFACE64=1 (#158) + * Export LAPACK 3.4.2 symbols in shared library. (#147) + * Only detect the number of physical CPU cores on Mac OSX. (#157) + * Fixed NetBSD build. (#155) + * Fixed compilation with TARGET=GENERIC. (#160) +x86/x86-64: + * Restore the original CPU affinity when calling + openblas_set_num_threads(1) (#153) + * Fixed a SEGFAULT bug in dgemv_t when m is very large.(#154) +MIPS64: + ==================================================================== Version 0.2.4 8-Oct-2012 diff --git a/Makefile.rule b/Makefile.rule index debc91dcf..1240ab0ad 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.2.4 +VERSION = 0.2.5 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library