commit
fa9a30b491
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
|||
project(OpenBLAS C ASM)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 13.dev)
|
||||
set(OpenBLAS_PATCH_VERSION 14.dev)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
|
|
|
@ -1,4 +1,52 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.14
|
||||
17-Mar-2021
|
||||
|
||||
common:
|
||||
* Fixed a race condition on thread shutdown in non-OpenMP builds
|
||||
* Fixed custom BUFFERSIZE option getting ignored in gmake builds
|
||||
* Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
|
||||
* Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
|
||||
* Improved performance of OMATCOPY_RT across all platforms
|
||||
* Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
|
||||
* Fixed potential misreading of the GCC compiler version in the build scripts
|
||||
* Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
|
||||
* Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)
|
||||
|
||||
RISCV:
|
||||
* Fixed compilation on RISCV (missing entry in getarch)
|
||||
|
||||
POWER:
|
||||
* Fixed compilation for DYNAMIC_ARCH with clang and with old gcc versions
|
||||
* Added support for compilation on FreeBSD/ppc64le
|
||||
* Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
|
||||
* Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
|
||||
* Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
|
||||
* Improved SCOPY and CCOPY performance on POWER10
|
||||
* Improved SGEMM and DGEMM performance on POWER10
|
||||
* Added support for compilation with the NVIDIA HPC compiler
|
||||
|
||||
x86_64:
|
||||
* Added an optimized bfloat16 GEMM kernel for Cooperlake
|
||||
* Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
|
||||
* Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
|
||||
* Added support for compilation with the NAG Fortran compiler
|
||||
* Fixed recognition of the AMD AOCC compiler
|
||||
* Fixed compilation for DYNAMIC_ARCH with clang on Windows
|
||||
* Added support for running the BLAS/CBLAS tests on Windows
|
||||
* Fixed signatures of the tls callback functions for Windows x64
|
||||
* Fixed various issues with fma intrinsics support handling
|
||||
|
||||
ARM:
|
||||
* Added support for embedded Cortex M targets via a new option EMBEDDED
|
||||
|
||||
ARMV8:
|
||||
* Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf
|
||||
* Added support for the DYNAMIC_LIST option
|
||||
* Added support for compilation with the NVIDIA HPC compiler
|
||||
* Added support for compiling with the NAG Fortran compiler
|
||||
|
||||
====================================================================
|
||||
Version 0.3.13
|
||||
12-Dec-2020
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.13.dev
|
||||
VERSION = 0.3.14.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
|
26
param.h
26
param.h
|
@ -72,6 +72,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifndef PARAM_H
|
||||
#define PARAM_H
|
||||
|
||||
#define LONGCAST (BLASLONG)
|
||||
#if defined(__BYTE_ORDER__)
|
||||
#if __GNUC__ < 9
|
||||
#undef LONGCAST
|
||||
#define LONGCAST
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define SBGEMM_DEFAULT_UNROLL_N 4
|
||||
#define SBGEMM_DEFAULT_UNROLL_M 8
|
||||
#define SBGEMM_DEFAULT_UNROLL_MN 32
|
||||
|
@ -2088,7 +2096,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifdef PPCG4
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -2119,7 +2127,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define GEMM_DEFAULT_OFFSET_A 2688
|
||||
#define GEMM_DEFAULT_OFFSET_B 3072
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
|
||||
|
||||
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
|
@ -2168,7 +2176,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
||||
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -2204,7 +2212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
|
||||
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 8
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -2239,7 +2247,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(POWER3) || defined(POWER4) || defined(POWER5)
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 2048
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -2312,7 +2320,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define GEMM_DEFAULT_OFFSET_A 384
|
||||
#define GEMM_DEFAULT_OFFSET_B 1024
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
|
||||
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||
|
@ -2345,7 +2353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||
|
||||
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||
#if defined(__32BIT__)
|
||||
#warning using BINARY32==POWER6
|
||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||
|
@ -2398,7 +2406,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||
|
||||
#define SWITCH_RATIO 16
|
||||
#define GEMM_PREFERED_SIZE 16
|
||||
|
@ -2437,7 +2445,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define GEMM_DEFAULT_OFFSET_A 0
|
||||
#define GEMM_DEFAULT_OFFSET_B 65536
|
||||
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
|
||||
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
|
||||
|
||||
#define SWITCH_RATIO 16
|
||||
#define GEMM_PREFERED_SIZE 16
|
||||
|
|
Loading…
Reference in New Issue