commit
89eea6b455
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
||||||
project(OpenBLAS C ASM)
|
project(OpenBLAS C ASM)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 3)
|
set(OpenBLAS_MINOR_VERSION 3)
|
||||||
set(OpenBLAS_PATCH_VERSION 10.dev)
|
set(OpenBLAS_PATCH_VERSION 11.dev)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
# Adhere to GNU filesystem layout conventions
|
# Adhere to GNU filesystem layout conventions
|
||||||
|
|
|
@ -1,4 +1,76 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.11
|
||||||
|
17-Oct-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
* API change:
|
||||||
|
the newly added BFLOAT16 functions were renamed to use the
|
||||||
|
letter "B" instead of "H" to avoid potential confusion with
|
||||||
|
the IEEE "half precision float" type, i.e. the 0.3.10
|
||||||
|
SHGEMM is now SBGEMM and the corresponding build option
|
||||||
|
was changed from "BUILD_HALF" to "BUILD_BFLOAT16".
|
||||||
|
* Reduced the default BLAS3_MEM_ALLOC_THRESHOLD (used as an upper
|
||||||
|
limit for placing temporary arrays on the stack) to be compatible
|
||||||
|
with a stack size of 1mb (as imposed by the JAVA runtime library)
|
||||||
|
* Added mixed-precision dot function SBDOT and utility functions
|
||||||
|
shstobf16, shdtobf16, sbf16tos and dbf16tod to convert between
|
||||||
|
single or double precision float arrays and bfloat16 arrays
|
||||||
|
* Fixed prototypes of LAPACK_?ggsvp and LAPACK_?ggsvd functions
|
||||||
|
in lapack.h
|
||||||
|
* Fixed underflow and rounding errors in LAPACK SLANV2 and DLANV2
|
||||||
|
(causing miscalculations in e.g. SHSEQR/DHSEQR, LAPACK issue #263)
|
||||||
|
* Fixed workspace calculation in LAPACK ?GELQ (LAPACK issue #415)
|
||||||
|
* Fixed several bugs in the LAPACK testsuite
|
||||||
|
* Improved performance of TRMM and TRSM for certain problem sizes
|
||||||
|
* Fixed infinite recursions and workspace miscalculations in ReLAPACK
|
||||||
|
* CMAKE builds no longer require pkg-config for creating the .pc file
|
||||||
|
* Makefile builds no longer misread NO_CBLAS=0 or NO_LAPACK=0 as
|
||||||
|
enabling these options
|
||||||
|
* Fixed detection of gfortran when invoked through an mpi wrapper
|
||||||
|
* Improve thread reinitialization performance with OpenMP xafter a fork
|
||||||
|
* Added support for building only the subset of the library required
|
||||||
|
for a particular precision by specifying BUILD_SINGLE, BUILD_DOUBLE
|
||||||
|
* Optional function name prefixes and suffixes are now correctly
|
||||||
|
reflected in the generated cblas.h
|
||||||
|
* Added CMAKE build support for the LAPACK and multithreading tests
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Added optimized support for POWER10
|
||||||
|
* Added support for compiling for POWER8 in 32bit mode
|
||||||
|
* Added support for compilation with LLVM/clang
|
||||||
|
* Added support for compilation with NVIDIA/PGI compilers
|
||||||
|
* Fixed building on big-endian POWER8
|
||||||
|
* Fixed miscompilation of ZDOTC by gcc10
|
||||||
|
* Fixed alignment errors in the POWER8 SAXPY kernel
|
||||||
|
* Improved CPU detection on AIX
|
||||||
|
* Supported building with older compilers on POWER9
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* Added support for Intel Cooperlake
|
||||||
|
* Added autodetection of AMD Renoir/Matisse/Zen3 cpus
|
||||||
|
* Added autodetection of Intel Comet Lake cpus
|
||||||
|
* Reimplemented ?sum, ?dot and daxpy using universal intrinsics
|
||||||
|
* Reset the fpu state before using the fpu on Windows as a workaround
|
||||||
|
for a problem introduced in Windows 10 build 19041 (a.k.a. SDK 2004)
|
||||||
|
* Fixed potentially undefined behaviour in the dot and gemv_t kernels
|
||||||
|
* Fixed a potential segmentation fault in DYNAMIC_ARCH builds
|
||||||
|
* Fixed building for ZEN with PGI/NVIDIA and AMD AOCC compilers
|
||||||
|
|
||||||
|
ARMV7:
|
||||||
|
* Fixed cpu detection on BSD-like systems
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* Added preliminary support for Apple Vortex cpus
|
||||||
|
* Added support for the Cavium ThunderX3T110 cpu
|
||||||
|
* Fixed cpu detection on BSD-like systems
|
||||||
|
* Fixed compilation in -std=C18 mode
|
||||||
|
|
||||||
|
|
||||||
|
IBM Z:
|
||||||
|
* Added support for compiling with the clang compiler
|
||||||
|
* Improved GEMM performance on Z14
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.10
|
Version 0.3.10
|
||||||
14-Jun-2020
|
14-Jun-2020
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.3.10.dev
|
VERSION = 0.3.11.dev
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
|
|
@ -78,6 +78,10 @@ GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||||
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
|
||||||
CCOMMON_OPT += -mavx2
|
CCOMMON_OPT += -mavx2
|
||||||
endif
|
endif
|
||||||
|
else
|
||||||
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
|
CCOMMON_OPT += -mavx2
|
||||||
|
endif
|
||||||
endif
|
endif
|
||||||
ifeq ($(F_COMPILER), GFORTRAN)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
# AVX2 support was added in 4.7.0
|
# AVX2 support was added in 4.7.0
|
||||||
|
|
Loading…
Reference in New Issue