diff --git a/CMakeLists.txt b/CMakeLists.txt index 74db77135..951271717 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 8.dev) +set(OpenBLAS_PATCH_VERSION 9.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions diff --git a/Changelog.txt b/Changelog.txt index f160a4e13..d66b2719a 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,45 +1,100 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.8 + 9-Feb-2020 + +common: +` * LAPACK has been updated to 3.9.0 (plus patches up to + January 2nd, 2020) + * CMAKE support has been improved in several areas including + cross-compilation + * a thread race condition in the GEMM3M kernels was resolved + * the "generic" (plain C) gemm beta kernel used by many targets + has been sped up + * an optimized version of the LAPACK trtrs functions has been added + * an incompatibilty between the LAPACK tests and the OpenBLAS + implementation of XERBLA was resolved, removing the numerous + warnings about wrong error exits in the former + * support for NetBSD has been added + * support for compilation with g95 and non-GNU versions of ld + has been improved + * support for compilation with (upcoming) gcc 10 has been added + +POWER: + * worked around miscompilation of several POWER8 and POWER9 + kernels by older versions of gcc + * added support for big-endian POWER8 and for compilation on AIX + * corrected bugs in the big-endian support for PPC440 and PPC970 + * DYNAMIC_ARCH support is now available in CMAKE builds as well + +ARMV8: + * performance of DGEMM_BETA and SGEMM_NCOPY has been improved + * compilation for 32bit works again + * performance of the RPCC function has been improved + * improved performance on small systems + * DYNAMIC_ARCH support is now available in CMAKE builds as well + * cross-compilation from OSX to IOS was simplified + +x86_64: + * a new AVX512 DGEMM kernel was added and the AVX512 SGEMM kernel + was significantly improved + * optimized AVX512 kernels for CGEMM and ZGEMM have been added + * AVX2 kernels for STRMM, SGEMM, and CGEMM have been significantly + sped up and optimized CGEMM3M and ZGEMM3M kernels have been added + * added support for QEMU virtual cpus + * a compilation problem with PGI and SUN compilers was fixed + * Intel "Goldmont plus" is now autodetected + * a potential crash on program exit on MS Windows has been fixed + +x86: + * an unwanted case sensitivity in the implementation of LSAME + on older 32bit AMD cpus was fixed + +zarch: + * Z15 is now supported as Z14 + * DYNAMIC_ARCH is now available on ZARCH as well + ==================================================================== Version 0.3.7 11-Aug 2019 common: - * having the gmake special variables TARGET_ARCH or TARGET_MACH - defined no longer causes build failures in ctest or utest - * defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer - has the same effect as setting them to 1 - * a new test program was added to allow checking the library for - thread safety - * a new option USE_LOCKING was added to ensure thread safety when - OpenBLAS itself is built without multithreading but will be - called from multiple threads. - * a build failure on Linux with glibc versions earlier than 2.5 - was fixed - * a runtime error with CPU enumeration (and NO_AFFINITY not set) - on glibc 2.6 was fixed - * NO_AFFINITY was added to the CMAKE options (and defaults to being - active on Linux, as in the gmake builds) + * having the gmake special variables TARGET_ARCH or TARGET_MACH + defined no longer causes build failures in ctest or utest + * defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer + has the same effect as setting them to 1 + * a new test program was added to allow checking the library for + thread safety + * a new option USE_LOCKING was added to ensure thread safety when + OpenBLAS itself is built without multithreading but will be + called from multiple threads. + * a build failure on Linux with glibc versions earlier than 2.5 + was fixed + * a runtime error with CPU enumeration (and NO_AFFINITY not set) + on glibc 2.6 was fixed + * NO_AFFINITY was added to the CMAKE options (and defaults to being + active on Linux, as in the gmake builds) x86_64: - * the build-time logic for detection of AVX512 availability in - the processor and compiler was fixed - * gmake builds on OSX now set the internal name of the library to - libopenblas.0.dylib (consistent with CMAKE) - * the Haswell DGEMM kernel received a significant speedup through - improved prefetch and load instructions - * performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly - increased by avoiding vpermpd instructions - * the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled - to fix remaining errors in DGEMM, DSYMM and DTRMM + * the build-time logic for detection of AVX512 availability in + the processor and compiler was fixed + * gmake builds on OSX now set the internal name of the library to + libopenblas.0.dylib (consistent with CMAKE) + * the Haswell DGEMM kernel received a significant speedup through + improved prefetch and load instructions + * performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly + increased by avoiding vpermpd instructions + * the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled + to fix remaining errors in DGEMM, DSYMM and DTRMM -## POWER: - * added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970 - * added optimized kernels for POWER9 SGEMM and STRMM +POWER: + * added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970 + * added optimized kernels for POWER9 SGEMM and STRMM -## ARMV7: - * fixed the softfp implementations of xAMAX and IxAMAX - * removed the predefined -march= flags on both ARMV5 and ARMV6 as - they were appropriate for only a subset of platforms +ARMV7: + * fixed the softfp implementations of xAMAX and IxAMAX + * removed the predefined -march= flags on both ARMV5 and ARMV6 as + they were appropriate for only a subset of platforms ==================================================================== Version 0.3.6 diff --git a/Makefile.rule b/Makefile.rule index c0941e488..21b7e138a 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.8.dev +VERSION = 0.3.9.dev # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library