Merge branch 'develop' into fbsd12
This commit is contained in:
commit
c5f8aeff2d
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
|||
project(OpenBLAS C ASM)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 4.dev)
|
||||
set(OpenBLAS_PATCH_VERSION 5.dev)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
|
|
|
@ -1,4 +1,77 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.4
|
||||
02-Dec-2018
|
||||
|
||||
common:
|
||||
* the new, experimental thread-local memory allocation had
|
||||
inadvertently been left enabled for gmake builds in 0.3.3
|
||||
despite the announcement. It is now disabled by default, and
|
||||
single-threaded builds will keep using the old allocator even
|
||||
if the USE_TLS option is turned on.
|
||||
* OpenBLAS will now provide enough buffer space for at least 50
|
||||
threads by default.
|
||||
* The output of openblas_get_config() now contains the version
|
||||
number.
|
||||
* A serious thread safety bug in GEMV operation with small M and
|
||||
large N size has been fixed.
|
||||
* The code will now automatically call blas_thread_init after a
|
||||
fork if needed before handling a call to openblas_set_num_threads
|
||||
* Accesses to parallelized level3 functions from multiple callers
|
||||
are now serialized to avoid thread races (unless using OpenMP).
|
||||
This should provide better performance than the known-threadsafe
|
||||
(but non-default) USE_SIMPLE_THREADED_LEVEL3 option.
|
||||
* When building LAPACK with gfortran, -frecursive is now (again)
|
||||
enabled by default to ensure correct behaviour.
|
||||
* The OpenBLAS version cblas.h now supports both CBLAS_ORDER and
|
||||
CBLAS_LAYOUT as the name of the matrix row/column order option.
|
||||
* Externally set LDFLAGS are now passed through to the final compile/link
|
||||
steps to facilitate setting platform-specific linker flags.
|
||||
* A potential race condition during the build of LAPACK (that would
|
||||
usually manifest itself as a failure to build TESTING/MATGEN) has been
|
||||
fixed.
|
||||
* xHEMV has been changed to stay single-threaded for small input sizes
|
||||
where the overhead of multithreading exceeds any possible gains
|
||||
* CSWAP and ZSWAP have been limited to a single thread except on ARMV8 or
|
||||
ThunderX hardware with sizable input.
|
||||
* Linker flags for the PGI compiler have been updated
|
||||
* Behaviour of AXPY with zero increments is now handled in the C interface,
|
||||
correcting the result on at least Intel Atom.
|
||||
* The result matrix from calling SGELSS with an all-zero input matrix is
|
||||
now zeroed completely.
|
||||
|
||||
x86_64:
|
||||
* Autodetection of AMD Ryzen2 has been fixed (again).
|
||||
* CMAKE builds now support labeling of an INTERFACE64=1 build of
|
||||
the library with the _64 suffix.
|
||||
* AVX512 version of DGEMM has been added and the AVX512 SGEMM kernel
|
||||
has been sped up by rewriting with C intrinsics
|
||||
* Fixed compilation on RHEL5/CENTOS5 (issue with typename __WAIT_STATUS)
|
||||
|
||||
POWER:
|
||||
* added support for building on AIX (with gcc and GNU tools from AIX Toolbox).
|
||||
* CPU type detection has been implemented for AIX.
|
||||
* CPU type detection has been fixed for NETBSD.
|
||||
|
||||
MIPS64:
|
||||
* AXPY on LOONGSON3A has been corrected to pass "zero increment" utest.
|
||||
* DSDOT on LOONGSON3A has been fixed.
|
||||
* the SGEMM microkernel has been hardened against potential data loss.
|
||||
|
||||
ARMV8:
|
||||
* DYNAMic_ARCH support is now available for 64bit ARM
|
||||
* cross-compiling for ARMV8 under iOS now works.
|
||||
* cpu-specific code has been rearranged to make better use of both
|
||||
hardware commonalities and model-specific compiler optimizations.
|
||||
* XGENE1 has been removed as a TARGET, superseded by the improved generic
|
||||
ARMV8 support.
|
||||
|
||||
ARMV7:
|
||||
* Older assembly mnemonics have been converted to UAL form to allow
|
||||
building with clang 7.0
|
||||
* Cross compiling LAPACKE for Android has been fixed again (broken by
|
||||
update to LAPACK 3.7.0 some while ago).
|
||||
|
||||
====================================================================
|
||||
Version 0.3.3
|
||||
31-Aug-2018
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.4.dev
|
||||
VERSION = 0.3.5.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
|
|
@ -1042,6 +1042,8 @@ ifdef USE_TLS
|
|||
CCOMMON_OPT += -DUSE_TLS
|
||||
endif
|
||||
|
||||
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
|
||||
|
||||
ifndef SYMBOLPREFIX
|
||||
SYMBOLPREFIX =
|
||||
endif
|
||||
|
|
|
@ -310,6 +310,8 @@ if (MIXED_MEMORY_ALLOCATION)
|
|||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
|
||||
endif ()
|
||||
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"")
|
||||
|
||||
set(REVISION "-r${OpenBLAS_VERSION}")
|
||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
|
||||
|
||||
|
|
|
@ -10,6 +10,16 @@ if (${HOST_OS} STREQUAL "WINDOWS")
|
|||
set(HOST_OS WINNT)
|
||||
endif ()
|
||||
|
||||
if (${HOST_OS} STREQUAL "LINUX")
|
||||
# check if we're building natively on Android (TERMUX)
|
||||
EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
|
||||
if(${OPERATING_SYSTEM} MATCHES "Android")
|
||||
set(HOST_OS ANDROID)
|
||||
endif(${OPERATING_SYSTEM} MATCHES "Android")
|
||||
endif()
|
||||
|
||||
|
||||
|
||||
if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
|
||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
|
||||
OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE
|
||||
|
|
|
@ -175,9 +175,9 @@ int detect(void){
|
|||
return CPUTYPE_PPC970;
|
||||
#endif
|
||||
|
||||
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
|
||||
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
|
||||
int id;
|
||||
id = __asm __volatile("mfpvr %0" : "=r"(id));
|
||||
__asm __volatile("mfpvr %0" : "=r"(id));
|
||||
switch ( id >> 16 ) {
|
||||
case 0x4e: // POWER9
|
||||
return CPUTYPE_POWER8;
|
||||
|
|
|
@ -2586,7 +2586,7 @@ void *blas_memory_alloc(int procpos){
|
|||
printf("Alloc Start ...\n");
|
||||
#endif
|
||||
|
||||
#if defined(WHEREAMI) && !defined(USE_OPENMP)
|
||||
/* #if defined(WHEREAMI) && !defined(USE_OPENMP)
|
||||
|
||||
mypos = WhereAmI();
|
||||
|
||||
|
@ -2596,12 +2596,12 @@ void *blas_memory_alloc(int procpos){
|
|||
do {
|
||||
if (!memory[position].used && (memory[position].pos == mypos)) {
|
||||
LOCK_COMMAND(&alloc_lock);
|
||||
/* blas_lock(&memory[position].lock);*/
|
||||
// blas_lock(&memory[position].lock);
|
||||
|
||||
if (!memory[position].used) goto allocation;
|
||||
|
||||
UNLOCK_COMMAND(&alloc_lock);
|
||||
/* blas_unlock(&memory[position].lock);*/
|
||||
// blas_unlock(&memory[position].lock);
|
||||
}
|
||||
|
||||
position ++;
|
||||
|
@ -2609,7 +2609,7 @@ void *blas_memory_alloc(int procpos){
|
|||
} while (position < NUM_BUFFERS);
|
||||
|
||||
|
||||
#endif
|
||||
#endif */
|
||||
|
||||
position = 0;
|
||||
|
||||
|
|
|
@ -42,6 +42,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
static char* openblas_config_str=""
|
||||
"OpenBLAS "
|
||||
VERSION
|
||||
" "
|
||||
#ifdef USE64BITINT
|
||||
" USE64BITINT "
|
||||
#endif
|
||||
|
|
|
@ -7,6 +7,7 @@ ZROTKERNEL = ../mips/zrot.c
|
|||
CSWAPKERNEL = ../mips/zswap.c
|
||||
ZSWAPKERNEL = ../mips/zswap.c
|
||||
|
||||
|
||||
ifndef SNRM2KERNEL
|
||||
SNRM2KERNEL = snrm2.S
|
||||
endif
|
||||
|
|
|
@ -63,6 +63,7 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||
|
||||
DSDOTKERNEL = ../mips/dot.c
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -146,11 +146,11 @@
|
|||
sd $21, 40($sp)
|
||||
sd $22, 48($sp)
|
||||
|
||||
ST $f24, 56($sp)
|
||||
ST $f25, 64($sp)
|
||||
ST $f26, 72($sp)
|
||||
ST $f27, 80($sp)
|
||||
ST $f28, 88($sp)
|
||||
sdc1 $f24, 56($sp)
|
||||
sdc1 $f25, 64($sp)
|
||||
sdc1 $f26, 72($sp)
|
||||
sdc1 $f27, 80($sp)
|
||||
sdc1 $f28, 88($sp)
|
||||
|
||||
#if defined(TRMMKERNEL)
|
||||
sd $23, 96($sp)
|
||||
|
@ -161,10 +161,10 @@
|
|||
#endif
|
||||
|
||||
#ifndef __64BIT__
|
||||
ST $f20,120($sp)
|
||||
ST $f21,128($sp)
|
||||
ST $f22,136($sp)
|
||||
ST $f23,144($sp)
|
||||
sdc1 $f20,120($sp)
|
||||
sdc1 $f21,128($sp)
|
||||
sdc1 $f22,136($sp)
|
||||
sdc1 $f23,144($sp)
|
||||
#endif
|
||||
|
||||
.align 4
|
||||
|
@ -7766,11 +7766,11 @@
|
|||
ld $21, 40($sp)
|
||||
ld $22, 48($sp)
|
||||
|
||||
LD $f24, 56($sp)
|
||||
LD $f25, 64($sp)
|
||||
LD $f26, 72($sp)
|
||||
LD $f27, 80($sp)
|
||||
LD $f28, 88($sp)
|
||||
ldc1 $f24, 56($sp)
|
||||
ldc1 $f25, 64($sp)
|
||||
ldc1 $f26, 72($sp)
|
||||
ldc1 $f27, 80($sp)
|
||||
ldc1 $f28, 88($sp)
|
||||
|
||||
#if defined(TRMMKERNEL)
|
||||
ld $23, 96($sp)
|
||||
|
@ -7779,10 +7779,10 @@
|
|||
#endif
|
||||
|
||||
#ifndef __64BIT__
|
||||
LD $f20,120($sp)
|
||||
LD $f21,128($sp)
|
||||
LD $f22,136($sp)
|
||||
LD $f23,144($sp)
|
||||
ldc1 $f20,120($sp)
|
||||
ldc1 $f21,128($sp)
|
||||
ldc1 $f22,136($sp)
|
||||
ldc1 $f23,144($sp)
|
||||
#endif
|
||||
|
||||
daddiu $sp,$sp,STACKSIZE
|
||||
|
|
|
@ -56,7 +56,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
|
|||
}
|
||||
|
||||
if (n == 0 || m == 0)
|
||||
return;
|
||||
return 0;
|
||||
|
||||
c_offset = c;
|
||||
|
||||
|
|
Loading…
Reference in New Issue