Merge pull request #19 from xianyi/develop

rebase
This commit is contained in:
Martin Kroeker 2021-03-18 07:47:03 +01:00 committed by GitHub
commit fa9a30b491
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 67 additions and 11 deletions

View File

@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 13.dev)
set(OpenBLAS_PATCH_VERSION 14.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
# Adhere to GNU filesystem layout conventions

View File

@ -1,4 +1,52 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.14
17-Mar-2021
common:
* Fixed a race condition on thread shutdown in non-OpenMP builds
* Fixed custom BUFFERSIZE option getting ignored in gmake builds
* Fixed CMAKE compilation of the TRMM kernels for GENERIC platforms
* Added CBLAS interfaces for CROTG, ZROTG, CSROT and ZDROT
* Improved performance of OMATCOPY_RT across all platforms
* Changed perl scripts to use env instead of a hardcoded /usr/bin/perl
* Fixed potential misreading of the GCC compiler version in the build scripts
* Fixed convergence problems in LAPACK complex GGEV/GGES (Reference-LAPACK #477)
* Reduced the stacksize requirements for running the LAPACK testsuite (Reference-LAPACK #335)
RISCV:
* Fixed compilation on RISCV (missing entry in getarch)
POWER:
* Fixed compilation for DYNAMIC_ARCH with clang and with old gcc versions
* Added support for compilation on FreeBSD/ppc64le
* Added optimized POWER10 kernels for SSCAL, DSCAL, CSCAL, ZSCAL
* Added optimized POWER10 kernels for SROT, DROT, CDOT, SASUM, DASUM
* Improved SSWAP, DSWAP, CSWAP, ZSWAP performance on POWER10
* Improved SCOPY and CCOPY performance on POWER10
* Improved SGEMM and DGEMM performance on POWER10
* Added support for compilation with the NVIDIA HPC compiler
x86_64:
* Added an optimized bfloat16 GEMM kernel for Cooperlake
* Added CPUID autodetection for Intel Rocket Lake and Tiger Lake cpus
* Improved the performance of SASUM,DASUM,SROT,DROT on AMD Ryzen cpus
* Added support for compilation with the NAG Fortran compiler
* Fixed recognition of the AMD AOCC compiler
* Fixed compilation for DYNAMIC_ARCH with clang on Windows
* Added support for running the BLAS/CBLAS tests on Windows
* Fixed signatures of the tls callback functions for Windows x64
* Fixed various issues with fma intrinsics support handling
ARM:
* Added support for embedded Cortex M targets via a new option EMBEDDED
ARMV8:
* Fixed the THUNDERX2T99 and NEOVERSEN1 DNRM2/ZNRM2 kernels for inputs with Inf
* Added support for the DYNAMIC_LIST option
* Added support for compilation with the NVIDIA HPC compiler
* Added support for compiling with the NAG Fortran compiler
====================================================================
Version 0.3.13
12-Dec-2020

View File

@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.3.13.dev
VERSION = 0.3.14.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

26
param.h
View File

@ -72,6 +72,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef PARAM_H
#define PARAM_H
#define LONGCAST (BLASLONG)
#if defined(__BYTE_ORDER__)
#if __GNUC__ < 9
#undef LONGCAST
#define LONGCAST
#endif
#endif
#define SBGEMM_DEFAULT_UNROLL_N 4
#define SBGEMM_DEFAULT_UNROLL_M 8
#define SBGEMM_DEFAULT_UNROLL_MN 32
@ -2088,7 +2096,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifdef PPCG4
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 1024
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
#define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4
@ -2119,7 +2127,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 2688
#define GEMM_DEFAULT_OFFSET_B 3072
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
#if defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#define SGEMM_DEFAULT_UNROLL_M 4
@ -2168,7 +2176,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
@ -2204,7 +2212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A (32 * 0)
#define GEMM_DEFAULT_OFFSET_B (32 * 0)
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
#define SGEMM_DEFAULT_UNROLL_M 8
#define SGEMM_DEFAULT_UNROLL_N 4
@ -2239,7 +2247,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(POWER3) || defined(POWER4) || defined(POWER5)
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 2048
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
@ -2312,7 +2320,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 384
#define GEMM_DEFAULT_OFFSET_B 1024
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x03fffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x03fffUL
#define SGEMM_DEFAULT_UNROLL_M 4
#define SGEMM_DEFAULT_UNROLL_N 4
@ -2345,7 +2353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 65536
#define GEMM_DEFAULT_ALIGN (BLASLONG)0x0ffffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
#if defined(__32BIT__)
#warning using BINARY32==POWER6
#define SGEMM_DEFAULT_UNROLL_M 4
@ -2398,7 +2406,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 65536
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16
@ -2437,7 +2445,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define GEMM_DEFAULT_OFFSET_A 0
#define GEMM_DEFAULT_OFFSET_B 65536
#define GEMM_DEFAULT_ALIGN 0x0ffffUL
#define GEMM_DEFAULT_ALIGN LONGCAST 0x0ffffUL
#define SWITCH_RATIO 16
#define GEMM_PREFERED_SIZE 16