From 3197f86762f14753517dfebd7f8665cb6bf6c344 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 30 Aug 2018 23:43:14 +0200 Subject: [PATCH 01/24] Version 0.3.3 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 20ce02e87..0f985455b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 3.dev) +set(OpenBLAS_PATCH_VERSION 3) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions From f0563f14bab6afcb3263a4710087c704bddfbb98 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 30 Aug 2018 23:43:57 +0200 Subject: [PATCH 02/24] Version 0.3.3 --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index 4b815d7a8..6457532c8 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.3.dev +VERSION = 0.3.3 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library From fd8d1868a126bb9f12bbc43b36ee30d1ba943fbb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 31 Aug 2018 00:07:48 +0200 Subject: [PATCH 03/24] Updates for 0.3.3 --- Changelog.txt | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index 33dcacc51..faecd82e3 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,31 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.3 +31-Aug-2018 + +common: + * thread memory allocation has been switched back to the method + used before version 0.3.1 due to unexpected problems caused by + the new code under some circumstances. A new compile-time option + USE_TLS has been added to enable the new code, and it is hoped + that this can become the default again in the next version. + * LAPAck PR272 has been integrated, which fixes spurious errors + in DSYEVR and related functions caused by missing conversion + from ILAENV to ILAENV_2STAGE in several _2stage routines. + * the cmake-generated OpenBLASConfig.cmake now uses correct case + for the name of the library + * added support for Haiku OS + +x86_64: + * added AVX512 implementations of SDOT, DDOT, SAXPY, DAXPY, + DSCAL, DGEMVN and DSYMVL + * added a workaround for a cygwin issue that prevented compilation + of AVX512 code + +IBM Z: + * added autodetection of Z14 + * fixed TRMM errors in the generic target + ==================================================================== Version 0.3.2 30-Jul-2018 From 2d8064174c444bb377cc2e3879a9c8e76e45b314 Mon Sep 17 00:00:00 2001 From: fengrl <42458138+fengrl@users.noreply.github.com> Date: Fri, 26 Oct 2018 17:55:15 +0800 Subject: [PATCH 04/24] register push/pop command change 64bit push/pop register command should be used. Otherwise, data will lost. --- kernel/mips64/sgemm_kernel_8x4_ps.S | 36 ++++++++++++++--------------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/kernel/mips64/sgemm_kernel_8x4_ps.S b/kernel/mips64/sgemm_kernel_8x4_ps.S index 37b20a880..82703ff5d 100644 --- a/kernel/mips64/sgemm_kernel_8x4_ps.S +++ b/kernel/mips64/sgemm_kernel_8x4_ps.S @@ -146,11 +146,11 @@ sd $21, 40($sp) sd $22, 48($sp) - ST $f24, 56($sp) - ST $f25, 64($sp) - ST $f26, 72($sp) - ST $f27, 80($sp) - ST $f28, 88($sp) + sdc1 $f24, 56($sp) + sdc1 $f25, 64($sp) + sdc1 $f26, 72($sp) + sdc1 $f27, 80($sp) + sdc1 $f28, 88($sp) #if defined(TRMMKERNEL) sd $23, 96($sp) @@ -161,10 +161,10 @@ #endif #ifndef __64BIT__ - ST $f20,120($sp) - ST $f21,128($sp) - ST $f22,136($sp) - ST $f23,144($sp) + sdc1 $f20,120($sp) + sdc1 $f21,128($sp) + sdc1 $f22,136($sp) + sdc1 $f23,144($sp) #endif .align 4 @@ -7766,11 +7766,11 @@ ld $21, 40($sp) ld $22, 48($sp) - LD $f24, 56($sp) - LD $f25, 64($sp) - LD $f26, 72($sp) - LD $f27, 80($sp) - LD $f28, 88($sp) + ldc1 $f24, 56($sp) + ldc1 $f25, 64($sp) + ldc1 $f26, 72($sp) + ldc1 $f27, 80($sp) + ldc1 $f28, 88($sp) #if defined(TRMMKERNEL) ld $23, 96($sp) @@ -7779,10 +7779,10 @@ #endif #ifndef __64BIT__ - LD $f20,120($sp) - LD $f21,128($sp) - LD $f22,136($sp) - LD $f23,144($sp) + ldc1 $f20,120($sp) + ldc1 $f21,128($sp) + ldc1 $f22,136($sp) + ldc1 $f23,144($sp) #endif daddiu $sp,$sp,STACKSIZE From 43bb386b10d94b341d5c8a27b5634081bb87de7f Mon Sep 17 00:00:00 2001 From: fengruilin Date: Thu, 15 Nov 2018 11:11:59 +0800 Subject: [PATCH 05/24] fix dot problem on 64bit mips --- kernel/mips64/KERNEL | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/mips64/KERNEL b/kernel/mips64/KERNEL index e257dcfc9..3804b245d 100644 --- a/kernel/mips64/KERNEL +++ b/kernel/mips64/KERNEL @@ -6,6 +6,11 @@ CROTKERNEL = ../mips/zrot.c ZROTKERNEL = ../mips/zrot.c CSWAPKERNEL = ../mips/zswap.c ZSWAPKERNEL = ../mips/zswap.c +SDOTKERNEL = ../mips/dot.c +DDOTKERNEL = ../mips/dot.c +CDOTKERNEL = ../mips/zdot.c +ZDOTKERNEL = ../mips/zdot.c + ifndef SNRM2KERNEL SNRM2KERNEL = snrm2.S From f049a4c84f5854d72ac3c06d9867c1b46d7e8bff Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Sun, 25 Nov 2018 21:34:09 +0100 Subject: [PATCH 06/24] init From 19c4bdd8b3f3fc5a97a5b756f6590bdb6d2a3ee9 Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Sun, 25 Nov 2018 21:35:01 +0100 Subject: [PATCH 07/24] Add return value so that freebsd system clang does not err out --- kernel/x86_64/sgemm_beta_skylakex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/x86_64/sgemm_beta_skylakex.c b/kernel/x86_64/sgemm_beta_skylakex.c index 4e40acadf..498c46f0d 100644 --- a/kernel/x86_64/sgemm_beta_skylakex.c +++ b/kernel/x86_64/sgemm_beta_skylakex.c @@ -56,7 +56,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta, } if (n == 0 || m == 0) - return; + return 0; c_offset = c; From 816775e3099cba07b4ad2636090c1f752d9f8b3e Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 29 Nov 2018 00:06:44 +0100 Subject: [PATCH 08/24] Add version information to openblas_get_config output --- driver/others/openblas_get_config.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index 3e87f2cc2..471be21bc 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif static char* openblas_config_str="" +"VERSION " + VERSION #ifdef USE64BITINT - "USE64BITINT " + " USE64BITINT " #endif #ifdef NO_CBLAS "NO_CBLAS " From a29ec458c238a9b1183baaf6d5c99d14d206987a Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 29 Nov 2018 00:10:49 +0100 Subject: [PATCH 09/24] propagate verison number for openblas_config_version --- Makefile.system | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.system b/Makefile.system index 1427d09fb..22fe24337 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1036,6 +1036,8 @@ ifdef USE_TLS CCOMMON_OPT += -DUSE_TLS endif +CCOMMON_OPT += -DVERSION=\"$(VERSION)\" + ifndef SYMBOLPREFIX SYMBOLPREFIX = endif From 081ceb3e029e04b3a2773915cc67dc848bab3ef2 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 29 Nov 2018 00:12:04 +0100 Subject: [PATCH 10/24] Propagate version number for openblas_get_config --- cmake/system.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/system.cmake b/cmake/system.cmake index 61f96edb0..d803bb9eb 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -310,6 +310,8 @@ if (MIXED_MEMORY_ALLOCATION) set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") endif () +set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"") + set(REVISION "-r${OpenBLAS_VERSION}") set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) From de0d0ed52f314a6b370fab03bc21ebbb3d943bbc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 29 Nov 2018 11:28:19 +0100 Subject: [PATCH 11/24] Improve formatting of config output --- driver/others/openblas_get_config.c | 1 + 1 file changed, 1 insertion(+) diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index 471be21bc..4f22325b6 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -44,6 +44,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. static char* openblas_config_str="" "VERSION " VERSION +" " #ifdef USE64BITINT " USE64BITINT " #endif From 97d72989739163171930046dba8d7a3214f49b9c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 29 Nov 2018 11:52:08 +0100 Subject: [PATCH 12/24] call it OpenBLAS not just version --- driver/others/openblas_get_config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/driver/others/openblas_get_config.c b/driver/others/openblas_get_config.c index 4f22325b6..eca494dca 100644 --- a/driver/others/openblas_get_config.c +++ b/driver/others/openblas_get_config.c @@ -42,7 +42,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #endif static char* openblas_config_str="" -"VERSION " +"OpenBLAS " VERSION " " #ifdef USE64BITINT From 7a2e1bc8041a898cadea475a0562e5b40ec49750 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 30 Nov 2018 10:57:09 +0100 Subject: [PATCH 13/24] Use generic kernel for DSDOT/SDSDOT as discussed in #1834 --- kernel/mips64/KERNEL.LOONGSON3A | 1 + 1 file changed, 1 insertion(+) diff --git a/kernel/mips64/KERNEL.LOONGSON3A b/kernel/mips64/KERNEL.LOONGSON3A index 2d03ad7fa..0298faaad 100644 --- a/kernel/mips64/KERNEL.LOONGSON3A +++ b/kernel/mips64/KERNEL.LOONGSON3A @@ -63,6 +63,7 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +DSDOTKERNEL = ../mips/dot.c From 95a5542e3c21def6e63e9de8b5c1850830fc0289 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 30 Nov 2018 11:16:24 +0100 Subject: [PATCH 14/24] Revert DOT kernel changes from #1834 as the failures seen on Loongson3A appear to be limited to DSDOT/SDSDOT (i.e. my hackish "fix" from #1684) --- kernel/mips64/KERNEL | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/mips64/KERNEL b/kernel/mips64/KERNEL index e257dcfc9..f77ca19ed 100644 --- a/kernel/mips64/KERNEL +++ b/kernel/mips64/KERNEL @@ -1,9 +1,9 @@ CAXPYKERNEL = ../mips/zaxpy.c ZAXPYKERNEL = ../mips/zaxpy.c -SROTKERNEL = ../mips/rot.c -DROTKERNEL = ../mips/rot.c -CROTKERNEL = ../mips/zrot.c -ZROTKERNEL = ../mips/zrot.c +SROTKERNEL = ../mips/rot.c +DROTKERNEL = ../mips/rot.c +CROTKERNEL = ../mips/zrot.c +ZROTKERNEL = ../mips/zrot.c CSWAPKERNEL = ../mips/zswap.c ZSWAPKERNEL = ../mips/zswap.c From 2601cd58ab55d0b76c305bde1d320b8ab0da25ed Mon Sep 17 00:00:00 2001 From: Andrew <16061801+brada4@users.noreply.github.com> Date: Thu, 11 Oct 2018 23:29:34 +0300 Subject: [PATCH 15/24] remove surplus locking code , only enabled w x86, disabled or never enabled on all others --- driver/others/memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/driver/others/memory.c b/driver/others/memory.c index 25f198623..36815a39c 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -2586,7 +2586,7 @@ void *blas_memory_alloc(int procpos){ printf("Alloc Start ...\n"); #endif -#if defined(WHEREAMI) && !defined(USE_OPENMP) +/* #if defined(WHEREAMI) && !defined(USE_OPENMP) mypos = WhereAmI(); @@ -2596,12 +2596,12 @@ void *blas_memory_alloc(int procpos){ do { if (!memory[position].used && (memory[position].pos == mypos)) { LOCK_COMMAND(&alloc_lock); -/* blas_lock(&memory[position].lock);*/ +// blas_lock(&memory[position].lock); if (!memory[position].used) goto allocation; UNLOCK_COMMAND(&alloc_lock); -/* blas_unlock(&memory[position].lock);*/ +// blas_unlock(&memory[position].lock); } position ++; @@ -2609,7 +2609,7 @@ void *blas_memory_alloc(int procpos){ } while (position < NUM_BUFFERS); -#endif +#endif */ position = 0; From f85ce54d4a2c23b27d80ec454e150b5388d5d38c Mon Sep 17 00:00:00 2001 From: pkubaj Date: Fri, 30 Nov 2018 16:05:49 +0000 Subject: [PATCH 16/24] Use correct Makefile on powerpc64 FreeBSD uses powerpc64 name for POWER architecture. Use correct Makefile for this platform. --- Makefile.system | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile.system b/Makefile.system index 22fe24337..bf2b76fae 100644 --- a/Makefile.system +++ b/Makefile.system @@ -1060,7 +1060,11 @@ endif KERNELDIR = $(TOPDIR)/kernel/$(ARCH) +ifneq ($(ARCH), powerpc64) include $(TOPDIR)/Makefile.$(ARCH) +else +include $(TOPDIR)/Makefile.power +endif CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" From 731b2722ba4ba25d982682e47cbad0b780bd24d3 Mon Sep 17 00:00:00 2001 From: pkubaj Date: Fri, 30 Nov 2018 16:04:07 +0000 Subject: [PATCH 17/24] Fix build on POWER, remove DragonFly, add NetBSD __asm is complete on its own DBSD developers state they will only support amd64, but NetBSD supports POWER. --- cpuid_power.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpuid_power.c b/cpuid_power.c index fc36f8e2c..23e98ebb0 100644 --- a/cpuid_power.c +++ b/cpuid_power.c @@ -175,9 +175,9 @@ int detect(void){ return CPUTYPE_PPC970; #endif -#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) +#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__) int id; -id = __asm __volatile("mfpvr %0" : "=r"(id)); +__asm __volatile("mfpvr %0" : "=r"(id)); switch ( id >> 16 ) { case 0x4e: // POWER9 return CPUTYPE_POWER8; From 6c7b69108300511f4b4bece422c62a7e4ff89d87 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 30 Nov 2018 21:32:01 +0100 Subject: [PATCH 18/24] Really revert xDOT changes from 1832 neglected to rebase #1892 on merging --- kernel/mips64/KERNEL | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/kernel/mips64/KERNEL b/kernel/mips64/KERNEL index 3a26b820c..61da7445f 100644 --- a/kernel/mips64/KERNEL +++ b/kernel/mips64/KERNEL @@ -6,12 +6,8 @@ CROTKERNEL = ../mips/zrot.c ZROTKERNEL = ../mips/zrot.c CSWAPKERNEL = ../mips/zswap.c ZSWAPKERNEL = ../mips/zswap.c -SDOTKERNEL = ../mips/dot.c -DDOTKERNEL = ../mips/dot.c -CDOTKERNEL = ../mips/zdot.c -ZDOTKERNEL = ../mips/zdot.c - - + + ifndef SNRM2KERNEL SNRM2KERNEL = snrm2.S endif From dceff5542ce5aaf9b0a7198612c7fdf36228f3bb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 1 Dec 2018 20:56:11 +0100 Subject: [PATCH 19/24] Handle Android environments that identify as Linux (#1898) * Handle Android environments that identify as Linux termux terminal emulator does this, causing build failures through missed defines in common.h --- cmake/system_check.cmake | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cmake/system_check.cmake b/cmake/system_check.cmake index fe30c7600..6b602c1b0 100644 --- a/cmake/system_check.cmake +++ b/cmake/system_check.cmake @@ -10,6 +10,16 @@ if (${HOST_OS} STREQUAL "WINDOWS") set(HOST_OS WINNT) endif () +if (${HOST_OS} STREQUAL "LINUX") +# check if we're building natively on Android (TERMUX) + EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM) + if(${OPERATING_SYSTEM} MATCHES "Android") + set(HOST_OS ANDROID) + endif(${OPERATING_SYSTEM} MATCHES "Android") +endif() + + + if(CMAKE_COMPILER_IS_GNUCC AND WIN32) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE From 3c9e3faedb1d861dc094ebff0c508c679c4a3cb8 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 2 Dec 2018 23:24:53 +0100 Subject: [PATCH 20/24] fixup BSD naming of powerpc arch --- Makefile.system | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Makefile.system b/Makefile.system index bf2b76fae..6919c0114 100644 --- a/Makefile.system +++ b/Makefile.system @@ -11,7 +11,11 @@ endif # Catch conflicting usage of ARCH in some BSD environments ifeq ($(ARCH), amd64) -override ARCH=x86_64 +override ARCH=x86_64 +else ifeq ($(ARCH), powerpc64) +override ARCH=power +endif + endif NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib @@ -1060,11 +1064,7 @@ endif KERNELDIR = $(TOPDIR)/kernel/$(ARCH) -ifneq ($(ARCH), powerpc64) include $(TOPDIR)/Makefile.$(ARCH) -else -include $(TOPDIR)/Makefile.power -endif CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" From 93fa6b7b76ffbd56ffce54ac11467d580f53537c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 2 Dec 2018 23:42:33 +0100 Subject: [PATCH 21/24] Increment version to 0.3.5.dev --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 296113941..24c169afe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 4) +set(OpenBLAS_PATCH_VERSION 5.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions From f5acaad8f0590502e26539917a0704e572e17abc Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 2 Dec 2018 23:43:15 +0100 Subject: [PATCH 22/24] Increment version to 0.3.5.dev --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index f3086a01b..0d5b83b39 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.4 +VERSION = 0.3.5.dev # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library From 360374be62cab8f5be8baecfa675da59a571608d Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 2 Dec 2018 23:44:13 +0100 Subject: [PATCH 23/24] Update with the changes from 0.3.4 --- Changelog.txt | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index faecd82e3..0dd17a558 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,77 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.4 +02-Dec-2018 + +common: + * the new, experimental thread-local memory allocation had + inadvertently been left enabled for gmake builds in 0.3.3 + despite the announcement. It is now disabled by default, and + single-threaded builds will keep using the old allocator even + if the USE_TLS option is turned on. + * OpenBLAS will now provide enough buffer space for at least 50 + threads by default. + * The output of openblas_get_config() now contains the version + number. + * A serious thread safety bug in GEMV operation with small M and + large N size has been fixed. + * The code will now automatically call blas_thread_init after a + fork if needed before handling a call to openblas_set_num_threads + * Accesses to parallelized level3 functions from multiple callers + are now serialized to avoid thread races (unless using OpenMP). + This should provide better performance than the known-threadsafe + (but non-default) USE_SIMPLE_THREADED_LEVEL3 option. + * When building LAPACK with gfortran, -frecursive is now (again) + enabled by default to ensure correct behaviour. + * The OpenBLAS version cblas.h now supports both CBLAS_ORDER and + CBLAS_LAYOUT as the name of the matrix row/column order option. + * Externally set LDFLAGS are now passed through to the final compile/link + steps to facilitate setting platform-specific linker flags. + * A potential race condition during the build of LAPACK (that would + usually manifest itself as a failure to build TESTING/MATGEN) has been + fixed. + * xHEMV has been changed to stay single-threaded for small input sizes + where the overhead of multithreading exceeds any possible gains + * CSWAP and ZSWAP have been limited to a single thread except on ARMV8 or + ThunderX hardware with sizable input. + * Linker flags for the PGI compiler have been updated + * Behaviour of AXPY with zero increments is now handled in the C interface, + correcting the result on at least Intel Atom. + * The result matrix from calling SGELSS with an all-zero input matrix is + now zeroed completely. + +x86_64: + * Autodetection of AMD Ryzen2 has been fixed (again). + * CMAKE builds now support labeling of an INTERFACE64=1 build of + the library with the _64 suffix. + * AVX512 version of DGEMM has been added and the AVX512 SGEMM kernel + has been sped up by rewriting with C intrinsics + * Fixed compilation on RHEL5/CENTOS5 (issue with typename __WAIT_STATUS) + +POWER: + * added support for building on AIX (with gcc and GNU tools from AIX Toolbox). + * CPU type detection has been implemented for AIX. + * CPU type detection has been fixed for NETBSD. + +MIPS64: + * AXPY on LOONGSON3A has been corrected to pass "zero increment" utest. + * DSDOT on LOONGSON3A has been fixed. + * the SGEMM microkernel has been hardened against potential data loss. + +ARMV8: + * DYNAMic_ARCH support is now available for 64bit ARM + * cross-compiling for ARMV8 under iOS now works. + * cpu-specific code has been rearranged to make better use of both + hardware commonalities and model-specific compiler optimizations. + * XGENE1 has been removed as a TARGET, superseded by the improved generic + ARMV8 support. + +ARMV7: + * Older assembly mnemonics have been converted to UAL form to allow + building with clang 7.0 + * Cross compiling LAPACKE for Android has been fixed again (broken by + update to LAPACK 3.7.0 some while ago). + ==================================================================== Version 0.3.3 31-Aug-2018 From ea6d1b96bd3fdaf8e8b4d912bdd906cbcb9b1bbf Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Mon, 3 Dec 2018 08:59:10 +0100 Subject: [PATCH 24/24] Update Makefile.system --- Makefile.system | 2 -- 1 file changed, 2 deletions(-) diff --git a/Makefile.system b/Makefile.system index 6919c0114..3cf5a16b2 100644 --- a/Makefile.system +++ b/Makefile.system @@ -16,8 +16,6 @@ else ifeq ($(ARCH), powerpc64) override ARCH=power endif -endif - NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib # Default C compiler