diff --git a/CMakeLists.txt b/CMakeLists.txt index a6cf2ef83..21f0c9571 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 10.dev) +set(OpenBLAS_PATCH_VERSION 11.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions diff --git a/Changelog.txt b/Changelog.txt index cbf0b50f5..bd0e60992 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,76 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.11 + 17-Oct-2020 + + common: + * API change: + the newly added BFLOAT16 functions were renamed to use the + letter "B" instead of "H" to avoid potential confusion with + the IEEE "half precision float" type, i.e. the 0.3.10 + SHGEMM is now SBGEMM and the corresponding build option + was changed from "BUILD_HALF" to "BUILD_BFLOAT16". + * Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper + limit for placing temporary arrays on the stack) to be compatible + with a stack size of 1mb (as imposed by the JAVA runtime library) + * Added mixed-precision dot function SBDOT and utility functions + shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between + single or double precision float arrays and bfloat16 arrays + * Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions + in lapack.h + * Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2 + (causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263) + * Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415) + * Fixed several bugs in the LAPACK testsuite + * Improved performance of TRMM and TRSM for certain problem sizes + * Fixed infinite recursions and workspace miscalculations in ReLAPACK + * CMAKE builds no longer require pkg-config for creating the .pc file + * Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as + enabling these options + * Fixed detection of gfortran when invoked through an mpi wrapper + * Improve thread reinitialization performance with OpenMP xafter a fork + * Added support for building only the subset of the library required + for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE + * Optional function name prefixes and suffixes are now correctly + reflected in the generated cblas.h + * Added CMAKE build support for the LAPACK and multithreading tests + +POWER: + * Added optimized support for POWER10 + * Added support for compiling for POWER8 in 32bit mode + * Added support for compilation with LLVM/clang + * Added support for compilation with NVIDIA/PGI compilers + * Fixed building on big-endian POWER8 + * Fixed miscompilation of ZDOTC by gcc10 + * Fixed alignment errors in the POWER8 SAXPY kernel + * Improved CPU detection on AIX + * Supported building with older compilers on POWER9 + +x86_64: + * Added support for Intel Cooperlake + * Added autodetection of AMD Renoir/Matisse/Zen3 cpus + * Added autodetection of Intel Comet Lake cpus + * Reimplemented ?sum, ?dot and daxpy using universal intrinsics + * Reset the fpu state before using the fpu on Windows as a workaround + for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004) + * Fixed potentially undefined behaviour in the dot and gemv_t kernels + * Fixed a potential segmentation fault in DYNAMIC_ARCH builds + * Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers + +ARMV7: + * Fixed cpu detection on BSD-like systems + +ARMV8: + * Added preliminary support for Apple Vortex cpus + * Added support for the Cavium ThunderX3T110 cpu + * Fixed cpu detection on BSD-like systems + * Fixed compilation in -std=C18 mode + + +IBM Z: + * Added support for compiling with the clang compiler + * Improved GEMM performance on Z14 + ==================================================================== Version 0.3.10 14-Jun-2020 diff --git a/Makefile.rule b/Makefile.rule index 67d183936..e8f8c2951 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.10.dev +VERSION = 0.3.11.dev # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 27eb571ee..3a42e19e4 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -78,6 +78,10 @@ GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11) CCOMMON_OPT += -mavx2 endif +else +ifeq ($(C_COMPILER), CLANG) +CCOMMON_OPT += -mavx2 +endif endif ifeq ($(F_COMPILER), GFORTRAN) # AVX2 support was added in 4.7.0