commit
fa9a30b491
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
||||||
project(OpenBLAS C ASM)
|
project(OpenBLAS C ASM)
|
||||||
set(OpenBLAS_MAJOR_VERSION 0)
|
set(OpenBLAS_MAJOR_VERSION 0)
|
||||||
set(OpenBLAS_MINOR_VERSION 3)
|
set(OpenBLAS_MINOR_VERSION 3)
|
||||||
set(OpenBLAS_PATCH_VERSION 13.dev)
|
set(OpenBLAS_PATCH_VERSION 14.dev)
|
||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||||
|
|
||||||
# Adhere to GNU filesystem layout conventions
|
# Adhere to GNU filesystem layout conventions
|
||||||
|
|
|
@ -1,4 +1,52 @@
|
||||||
OpenBLAS ChangeLog
|
OpenBLAS ChangeLog
|
||||||
|
====================================================================
|
||||||
|
Version 0.3.14
|
||||||
|
17-Mar-2021
|
||||||
|
|
||||||
|
common:
|
||||||
|
* Fixed a race condition on thread shutdown in non-OpenMP builds
|
||||||
|
* Fixed custom BUFFERSIZE option getting ignored in gmake builds
|
||||||
|
* Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
|
||||||
|
* Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
|
||||||
|
* Improved performance of OMATCOPY_RT across all platforms
|
||||||
|
* Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
|
||||||
|
* Fixed potential misreading of the GCC compiler version in the build scripts
|
||||||
|
* Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
|
||||||
|
* Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)
|
||||||
|
|
||||||
|
RISCV:
|
||||||
|
* Fixed compilation on RISCV (missing entry in getarch)
|
||||||
|
|
||||||
|
POWER:
|
||||||
|
* Fixed compilation for DYNAMIC_ARCH with clang and with old gcc versions
|
||||||
|
* Added support for compilation on FreeBSD/ppc64le
|
||||||
|
* Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
|
||||||
|
* Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
|
||||||
|
* Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
|
||||||
|
* Improved SCOPY and CCOPY performance on POWER10
|
||||||
|
* Improved SGEMM and DGEMM performance on POWER10
|
||||||
|
* Added support for compilation with the NVIDIA HPC compiler
|
||||||
|
|
||||||
|
x86_64:
|
||||||
|
* Added an optimized bfloat16 GEMM kernel for Cooperlake
|
||||||
|
* Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
|
||||||
|
* Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
|
||||||
|
* Added support for compilation with the NAG Fortran compiler
|
||||||
|
* Fixed recognition of the AMD AOCC compiler
|
||||||
|
* Fixed compilation for DYNAMIC_ARCH with clang on Windows
|
||||||
|
* Added support for running the BLAS/CBLAS tests on Windows
|
||||||
|
* Fixed signatures of the tls callback functions for Windows x64
|
||||||
|
* Fixed various issues with fma intrinsics support handling
|
||||||
|
|
||||||
|
ARM:
|
||||||
|
* Added support for embedded Cortex M targets via a new option EMBEDDED
|
||||||
|
|
||||||
|
ARMV8:
|
||||||
|
* Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf
|
||||||
|
* Added support for the DYNAMIC_LIST option
|
||||||
|
* Added support for compilation with the NVIDIA HPC compiler
|
||||||
|
* Added support for compiling with the NAG Fortran compiler
|
||||||
|
|
||||||
====================================================================
|
====================================================================
|
||||||
Version 0.3.13
|
Version 0.3.13
|
||||||
12-Dec-2020
|
12-Dec-2020
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#
|
#
|
||||||
|
|
||||||
# This library's version
|
# This library's version
|
||||||
VERSION = 0.3.13.dev
|
VERSION = 0.3.14.dev
|
||||||
|
|
||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||||
|
|
26
param.h
26
param.h
|
@ -72,6 +72,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#ifndef PARAM_H
|
#ifndef PARAM_H
|
||||||
#define PARAM_H
|
#define PARAM_H
|
||||||
|
|
||||||
|
#define LONGCAST (BLASLONG)
|
||||||
|
#if defined(__BYTE_ORDER__)
|
||||||
|
#if __GNUC__ < 9
|
||||||
|
#undef LONGCAST
|
||||||
|
#define LONGCAST
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#define SBGEMM_DEFAULT_UNROLL_N 4
|
#define SBGEMM_DEFAULT_UNROLL_N 4
|
||||||
#define SBGEMM_DEFAULT_UNROLL_M 8
|
#define SBGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define SBGEMM_DEFAULT_UNROLL_MN 32
|
#define SBGEMM_DEFAULT_UNROLL_MN 32
|
||||||
|
@ -2088,7 +2096,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#ifdef PPCG4
|
#ifdef PPCG4
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -2119,7 +2127,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A 2688
|
#define GEMM_DEFAULT_OFFSET_A 2688
|
||||||
#define GEMM_DEFAULT_OFFSET_B 3072
|
#define GEMM_DEFAULT_OFFSET_B 3072
|
||||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
|
||||||
|
|
||||||
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
@ -2168,7 +2176,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
||||||
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
||||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -2204,7 +2212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
||||||
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
||||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
#define SGEMM_DEFAULT_UNROLL_M 8
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -2239,7 +2247,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#if defined(POWER3) || defined(POWER4) || defined(POWER5)
|
#if defined(POWER3) || defined(POWER4) || defined(POWER5)
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
#define GEMM_DEFAULT_OFFSET_B 2048
|
#define GEMM_DEFAULT_OFFSET_B 2048
|
||||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -2312,7 +2320,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A 384
|
#define GEMM_DEFAULT_OFFSET_A 384
|
||||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
@ -2345,7 +2353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
#define GEMM_DEFAULT_OFFSET_B 65536
|
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||||
|
|
||||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||||
#if defined(__32BIT__)
|
#if defined(__32BIT__)
|
||||||
#warning using BINARY32==POWER6
|
#warning using BINARY32==POWER6
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
@ -2398,7 +2406,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
#define GEMM_DEFAULT_OFFSET_B 65536
|
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||||
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||||
|
|
||||||
#define SWITCH_RATIO 16
|
#define SWITCH_RATIO 16
|
||||||
#define GEMM_PREFERED_SIZE 16
|
#define GEMM_PREFERED_SIZE 16
|
||||||
|
@ -2437,7 +2445,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define GEMM_DEFAULT_OFFSET_A 0
|
#define GEMM_DEFAULT_OFFSET_A 0
|
||||||
#define GEMM_DEFAULT_OFFSET_B 65536
|
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||||
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||||
|
|
||||||
#define SWITCH_RATIO 16
|
#define SWITCH_RATIO 16
|
||||||
#define GEMM_PREFERED_SIZE 16
|
#define GEMM_PREFERED_SIZE 16
|
||||||
|
|
Loading…
Reference in New Issue