From f1bb85d378ef4ebcfd4f4c7bbb14b074bfdc945f Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 16 Oct 2020 20:52:15 +0200 Subject: [PATCH 1/6] Add AVX flags for clang/aocc as well --- Makefile.x86_64 | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 27eb571ee..3a42e19e4 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -78,6 +78,10 @@ GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11) CCOMMON_OPT += -mavx2 endif +else +ifeq ($(C_COMPILER), CLANG) +CCOMMON_OPT += -mavx2 +endif endif ifeq ($(F_COMPILER), GFORTRAN) # AVX2 support was added in 4.7.0 From 5381a18056c1ad6fe171eef275f4b0095e22ee57 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 17 Oct 2020 22:05:36 +0200 Subject: [PATCH 2/6] Update Changelog.txt with the 0.3.11 changes --- Changelog.txt | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/Changelog.txt b/Changelog.txt index cbf0b50f5..bd0e60992 100644 --- a/Changelog.txt +++ b/Changelog.txt @@ -1,4 +1,76 @@ OpenBLAS ChangeLog +==================================================================== +Version 0.3.11 + 17-Oct-2020 + + common: + * API change: + the newly added BFLOAT16 functions were renamed to use the + letter "B" instead of "H" to avoid potential confusion with + the IEEE "half precision float" type, i.e. the 0.3.10 + SHGEMM is now SBGEMM and the corresponding build option + was changed from "BUILD_HALF" to "BUILD_BFLOAT16". + * Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper + limit for placing temporary arrays on the stack) to be compatible + with a stack size of 1mb (as imposed by the JAVA runtime library) + * Added mixed-precision dot function SBDOT and utility functions + shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between + single or double precision float arrays and bfloat16 arrays + * Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions + in lapack.h + * Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2 + (causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263) + * Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415) + * Fixed several bugs in the LAPACK testsuite + * Improved performance of TRMM and TRSM for certain problem sizes + * Fixed infinite recursions and workspace miscalculations in ReLAPACK + * CMAKE builds no longer require pkg-config for creating the .pc file + * Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as + enabling these options + * Fixed detection of gfortran when invoked through an mpi wrapper + * Improve thread reinitialization performance with OpenMP xafter a fork + * Added support for building only the subset of the library required + for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE + * Optional function name prefixes and suffixes are now correctly + reflected in the generated cblas.h + * Added CMAKE build support for the LAPACK and multithreading tests + +POWER: + * Added optimized support for POWER10 + * Added support for compiling for POWER8 in 32bit mode + * Added support for compilation with LLVM/clang + * Added support for compilation with NVIDIA/PGI compilers + * Fixed building on big-endian POWER8 + * Fixed miscompilation of ZDOTC by gcc10 + * Fixed alignment errors in the POWER8 SAXPY kernel + * Improved CPU detection on AIX + * Supported building with older compilers on POWER9 + +x86_64: + * Added support for Intel Cooperlake + * Added autodetection of AMD Renoir/Matisse/Zen3 cpus + * Added autodetection of Intel Comet Lake cpus + * Reimplemented ?sum, ?dot and daxpy using universal intrinsics + * Reset the fpu state before using the fpu on Windows as a workaround + for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004) + * Fixed potentially undefined behaviour in the dot and gemv_t kernels + * Fixed a potential segmentation fault in DYNAMIC_ARCH builds + * Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers + +ARMV7: + * Fixed cpu detection on BSD-like systems + +ARMV8: + * Added preliminary support for Apple Vortex cpus + * Added support for the Cavium ThunderX3T110 cpu + * Fixed cpu detection on BSD-like systems + * Fixed compilation in -std=C18 mode + + +IBM Z: + * Added support for compiling with the clang compiler + * Improved GEMM performance on Z14 + ==================================================================== Version 0.3.10 14-Jun-2020 From fe9015b619037fdbd04b8ffe4d58ab4f22ea21fd Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 17 Oct 2020 22:10:50 +0200 Subject: [PATCH 3/6] Update version for 0.3.11 release --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a6cf2ef83..e77aec030 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 10.dev) +set(OpenBLAS_PATCH_VERSION 11) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions From b8f689200eccb3802aaa1188a98d3b5578fce295 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 17 Oct 2020 22:11:34 +0200 Subject: [PATCH 4/6] Update version number to 0.3.11 --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index 67d183936..acfe568d6 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.10.dev +VERSION = 0.3.11 # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library From 26a701f4ad35372c449fd74875fa7f6ff35aeb10 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 17 Oct 2020 22:40:06 +0200 Subject: [PATCH 5/6] Update version string to 0.3.11.dev --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e77aec030..21f0c9571 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 11) +set(OpenBLAS_PATCH_VERSION 11.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions From 0ac610270809cb6dee8f5587784ceab8df356495 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sat, 17 Oct 2020 22:40:47 +0200 Subject: [PATCH 6/6] Update version string to 0.3.11.dev --- Makefile.rule | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile.rule b/Makefile.rule index acfe568d6..e8f8c2951 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.11 +VERSION = 0.3.11.dev # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library