commit
449e8ea443
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
||||||
project(OpenBLAS C ASM)
|
project(OpenBLAS C ASM)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 3)
|
set(OpenBLAS_MINOR_VERSION 3)
|
||||||
set(OpenBLAS_PATCH_VERSION 8.dev)
|
set(OpenBLAS_PATCH_VERSION 9.dev)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
# Adhere to GNU filesystem layout conventions
|
# Adhere to GNU filesystem layout conventions
|
||||||
|
|
119
Changelog.txt
119
Changelog.txt
|
@ -1,45 +1,100 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.8
|
||||||
|
9-Feb-2020
|
||||||
|
|
||||||
|
common:
|
||||||
|
` * LAPACK has been updated to 3.9.0 (plus patches up to
|
||||||
|
January 2nd, 2020)
|
||||||
|
* CMAKE support has been improved in several areas including
|
||||||
|
cross-compilation
|
||||||
|
* a thread race condition in the GEMM3M kernels was resolved
|
||||||
|
* the "generic" (plain C) gemm beta kernel used by many targets
|
||||||
|
has been sped up
|
||||||
|
* an optimized version of the LAPACK trtrs functions has been added
|
||||||
|
* an incompatibilty between the LAPACK tests and the OpenBLAS
|
||||||
|
implementation of XERBLA was resolved, removing the numerous
|
||||||
|
warnings about wrong error exits in the former
|
||||||
|
* support for NetBSD has been added
|
||||||
|
* support for compilation with g95 and non-GNU versions of ld
|
||||||
|
has been improved
|
||||||
|
* support for compilation with (upcoming) gcc 10 has been added
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* worked around miscompilation of several POWER8 and POWER9
|
||||||
|
kernels by older versions of gcc
|
||||||
|
* added support for big-endian POWER8 and for compilation on AIX
|
||||||
|
* corrected bugs in the big-endian support for PPC440 and PPC970
|
||||||
|
* DYNAMIC_ARCH support is now available in CMAKE builds as well
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* performance of DGEMM_BETA and SGEMM_NCOPY has been improved
|
||||||
|
* compilation for 32bit works again
|
||||||
|
* performance of the RPCC function has been improved
|
||||||
|
* improved performance on small systems
|
||||||
|
* DYNAMIC_ARCH support is now available in CMAKE builds as well
|
||||||
|
* cross-compilation from OSX to IOS was simplified
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* a new AVX512 DGEMM kernel was added and the AVX512 SGEMM kernel
|
||||||
|
was significantly improved
|
||||||
|
* optimized AVX512 kernels for CGEMM and ZGEMM have been added
|
||||||
|
* AVX2 kernels for STRMM, SGEMM, and CGEMM have been significantly
|
||||||
|
sped up and optimized CGEMM3M and ZGEMM3M kernels have been added
|
||||||
|
* added support for QEMU virtual cpus
|
||||||
|
* a compilation problem with PGI and SUN compilers was fixed
|
||||||
|
* Intel "Goldmont plus" is now autodetected
|
||||||
|
* a potential crash on program exit on MS Windows has been fixed
|
||||||
|
|
||||||
|
x86:
|
||||||
|
* an unwanted case sensitivity in the implementation of LSAME
|
||||||
|
on older 32bit AMD cpus was fixed
|
||||||
|
|
||||||
|
zarch:
|
||||||
|
* Z15 is now supported as Z14
|
||||||
|
* DYNAMIC_ARCH is now available on ZARCH as well
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.7
|
Version 0.3.7
|
||||||
11-Aug 2019
|
11-Aug 2019
|
||||||
|
|
||||||
common:
|
common:
|
||||||
* having the gmake special variables TARGET_ARCH or TARGET_MACH
|
* having the gmake special variables TARGET_ARCH or TARGET_MACH
|
||||||
defined no longer causes build failures in ctest or utest
|
defined no longer causes build failures in ctest or utest
|
||||||
* defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer
|
* defining NO_AFFINITY or USE_TLS to 0 in gmake builds no longer
|
||||||
has the same effect as setting them to 1
|
has the same effect as setting them to 1
|
||||||
* a new test program was added to allow checking the library for
|
* a new test program was added to allow checking the library for
|
||||||
thread safety
|
thread safety
|
||||||
* a new option USE_LOCKING was added to ensure thread safety when
|
* a new option USE_LOCKING was added to ensure thread safety when
|
||||||
OpenBLAS itself is built without multithreading but will be
|
OpenBLAS itself is built without multithreading but will be
|
||||||
called from multiple threads.
|
called from multiple threads.
|
||||||
* a build failure on Linux with glibc versions earlier than 2.5
|
* a build failure on Linux with glibc versions earlier than 2.5
|
||||||
was fixed
|
was fixed
|
||||||
* a runtime error with CPU enumeration (and NO_AFFINITY not set)
|
* a runtime error with CPU enumeration (and NO_AFFINITY not set)
|
||||||
on glibc 2.6 was fixed
|
on glibc 2.6 was fixed
|
||||||
* NO_AFFINITY was added to the CMAKE options (and defaults to being
|
* NO_AFFINITY was added to the CMAKE options (and defaults to being
|
||||||
active on Linux, as in the gmake builds)
|
active on Linux, as in the gmake builds)
|
||||||
|
|
||||||
x86_64:
|
x86_64:
|
||||||
* the build-time logic for detection of AVX512 availability in
|
* the build-time logic for detection of AVX512 availability in
|
||||||
the processor and compiler was fixed
|
the processor and compiler was fixed
|
||||||
* gmake builds on OSX now set the internal name of the library to
|
* gmake builds on OSX now set the internal name of the library to
|
||||||
libopenblas.0.dylib (consistent with CMAKE)
|
libopenblas.0.dylib (consistent with CMAKE)
|
||||||
* the Haswell DGEMM kernel received a significant speedup through
|
* the Haswell DGEMM kernel received a significant speedup through
|
||||||
improved prefetch and load instructions
|
improved prefetch and load instructions
|
||||||
* performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly
|
* performance of DGEMM, DTRMM, DTRSM and ZDOT on Zen/Zen2 was markedly
|
||||||
increased by avoiding vpermpd instructions
|
increased by avoiding vpermpd instructions
|
||||||
* the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled
|
* the SKYLAKEX (AVX512) DGEMM helper functions have now been disabled
|
||||||
to fix remaining errors in DGEMM, DSYMM and DTRMM
|
to fix remaining errors in DGEMM, DSYMM and DTRMM
|
||||||
|
|
||||||
## POWER:
|
POWER:
|
||||||
* added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970
|
* added support for building on FreeBSD/powerpc64 and FreeBSD/ppc970
|
||||||
* added optimized kernels for POWER9 SGEMM and STRMM
|
* added optimized kernels for POWER9 SGEMM and STRMM
|
||||||
|
|
||||||
## ARMV7:
|
ARMV7:
|
||||||
* fixed the softfp implementations of xAMAX and IxAMAX
|
* fixed the softfp implementations of xAMAX and IxAMAX
|
||||||
* removed the predefined -march= flags on both ARMV5 and ARMV6 as
|
* removed the predefined -march= flags on both ARMV5 and ARMV6 as
|
||||||
they were appropriate for only a subset of platforms
|
they were appropriate for only a subset of platforms
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.6
|
Version 0.3.6
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.3.8.dev
|
VERSION = 0.3.9.dev
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
|
Loading…
Reference in New Issue