Merge branch 'develop' into fbsd12
This commit is contained in:
		
						commit
						c5f8aeff2d
					
				| 
						 | 
					@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
 | 
				
			||||||
project(OpenBLAS C ASM)
 | 
					project(OpenBLAS C ASM)
 | 
				
			||||||
set(OpenBLAS_MAJOR_VERSION 0)
 | 
					set(OpenBLAS_MAJOR_VERSION 0)
 | 
				
			||||||
set(OpenBLAS_MINOR_VERSION 3)
 | 
					set(OpenBLAS_MINOR_VERSION 3)
 | 
				
			||||||
set(OpenBLAS_PATCH_VERSION 4.dev)
 | 
					set(OpenBLAS_PATCH_VERSION 5.dev)
 | 
				
			||||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
 | 
					set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Adhere to GNU filesystem layout conventions
 | 
					# Adhere to GNU filesystem layout conventions
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,77 @@
 | 
				
			||||||
OpenBLAS ChangeLog
 | 
					OpenBLAS ChangeLog
 | 
				
			||||||
 | 
					====================================================================
 | 
				
			||||||
 | 
					Version 0.3.4
 | 
				
			||||||
 | 
					02-Dec-2018
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					common:
 | 
				
			||||||
 | 
						* the new, experimental thread-local memory allocation had 
 | 
				
			||||||
 | 
						  inadvertently been left enabled for gmake builds in 0.3.3
 | 
				
			||||||
 | 
						  despite the announcement. It is now disabled by default, and
 | 
				
			||||||
 | 
						  single-threaded builds will keep using the old allocator even
 | 
				
			||||||
 | 
						  if the USE_TLS option is turned on.
 | 
				
			||||||
 | 
						* OpenBLAS will now provide enough buffer space for at least 50
 | 
				
			||||||
 | 
						  threads by default.
 | 
				
			||||||
 | 
						* The output of openblas_get_config() now contains the version
 | 
				
			||||||
 | 
						  number.
 | 
				
			||||||
 | 
						* A serious thread safety bug in GEMV operation with small M and
 | 
				
			||||||
 | 
						  large N size has been fixed.
 | 
				
			||||||
 | 
						* The code will now automatically call blas_thread_init after a
 | 
				
			||||||
 | 
						  fork if needed before handling a call to openblas_set_num_threads
 | 
				
			||||||
 | 
						* Accesses to parallelized level3 functions from multiple callers
 | 
				
			||||||
 | 
						  are now serialized to avoid thread races (unless using OpenMP).
 | 
				
			||||||
 | 
						  This should provide better performance than the known-threadsafe
 | 
				
			||||||
 | 
						  (but non-default) USE_SIMPLE_THREADED_LEVEL3 option.
 | 
				
			||||||
 | 
						* When building LAPACK with gfortran, -frecursive is now (again)
 | 
				
			||||||
 | 
						  enabled by default to ensure correct behaviour.
 | 
				
			||||||
 | 
					        * The OpenBLAS version cblas.h now supports both CBLAS_ORDER and
 | 
				
			||||||
 | 
						  CBLAS_LAYOUT as the name of the matrix row/column order option.
 | 
				
			||||||
 | 
						* Externally set LDFLAGS are now passed through to the final compile/link
 | 
				
			||||||
 | 
						  steps to facilitate setting platform-specific linker flags.
 | 
				
			||||||
 | 
						* A potential race condition during the build of LAPACK (that would 
 | 
				
			||||||
 | 
						  usually manifest itself as a failure to build TESTING/MATGEN) has been 
 | 
				
			||||||
 | 
						  fixed.
 | 
				
			||||||
 | 
						* xHEMV has been changed to stay single-threaded for small input sizes
 | 
				
			||||||
 | 
						  where the overhead of multithreading exceeds any possible gains
 | 
				
			||||||
 | 
						* CSWAP and ZSWAP have been limited to a single thread except on ARMV8 or
 | 
				
			||||||
 | 
						  ThunderX hardware with sizable input.
 | 
				
			||||||
 | 
						* Linker flags for the PGI compiler have been updated
 | 
				
			||||||
 | 
						* Behaviour of AXPY with zero increments is now handled in the C interface,
 | 
				
			||||||
 | 
						  correcting the result on at least Intel Atom.
 | 
				
			||||||
 | 
						* The result matrix from calling SGELSS with an all-zero input matrix is 
 | 
				
			||||||
 | 
						  now zeroed completely.
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
 | 
					x86_64:
 | 
				
			||||||
 | 
						* Autodetection of AMD Ryzen2 has been fixed (again).
 | 
				
			||||||
 | 
					        * CMAKE builds now support labeling of an INTERFACE64=1 build of
 | 
				
			||||||
 | 
						  the library with the _64 suffix.
 | 
				
			||||||
 | 
						* AVX512 version of DGEMM has been added and the AVX512 SGEMM kernel
 | 
				
			||||||
 | 
						  has been sped up by rewriting with C intrinsics
 | 
				
			||||||
 | 
						* Fixed compilation on RHEL5/CENTOS5 (issue with typename __WAIT_STATUS)
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					POWER:
 | 
				
			||||||
 | 
						* added support for building on AIX (with gcc and GNU tools from AIX Toolbox).
 | 
				
			||||||
 | 
						* CPU type detection has been implemented for AIX.
 | 
				
			||||||
 | 
						* CPU type detection has been fixed for NETBSD.
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					MIPS64:
 | 
				
			||||||
 | 
						* AXPY on LOONGSON3A has been corrected to pass "zero increment" utest.
 | 
				
			||||||
 | 
						* DSDOT on LOONGSON3A has been fixed.
 | 
				
			||||||
 | 
						* the SGEMM microkernel has been hardened against potential data loss.
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					ARMV8:
 | 
				
			||||||
 | 
						* DYNAMic_ARCH support is now available for 64bit ARM
 | 
				
			||||||
 | 
						* cross-compiling for ARMV8 under iOS now works.
 | 
				
			||||||
 | 
						* cpu-specific code has been rearranged to make better use of both
 | 
				
			||||||
 | 
						  hardware commonalities and model-specific compiler optimizations.
 | 
				
			||||||
 | 
						* XGENE1 has been removed as a TARGET, superseded by the improved generic
 | 
				
			||||||
 | 
						  ARMV8 support.
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					ARMV7:
 | 
				
			||||||
 | 
						* Older assembly mnemonics have been converted to UAL form to allow
 | 
				
			||||||
 | 
						  building with clang 7.0
 | 
				
			||||||
 | 
						* Cross compiling LAPACKE for Android has been fixed again (broken by
 | 
				
			||||||
 | 
						  update to LAPACK 3.7.0 some while ago).  
 | 
				
			||||||
 | 
						  
 | 
				
			||||||
====================================================================
 | 
					====================================================================
 | 
				
			||||||
Version 0.3.3
 | 
					Version 0.3.3
 | 
				
			||||||
31-Aug-2018
 | 
					31-Aug-2018
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# This library's version
 | 
					# This library's version
 | 
				
			||||||
VERSION = 0.3.4.dev
 | 
					VERSION = 0.3.5.dev
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
 | 
					# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
 | 
				
			||||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
 | 
					# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1042,6 +1042,8 @@ ifdef USE_TLS
 | 
				
			||||||
CCOMMON_OPT += -DUSE_TLS
 | 
					CCOMMON_OPT += -DUSE_TLS
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
ifndef SYMBOLPREFIX
 | 
					ifndef SYMBOLPREFIX
 | 
				
			||||||
SYMBOLPREFIX =
 | 
					SYMBOLPREFIX =
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -310,6 +310,8 @@ if (MIXED_MEMORY_ALLOCATION)
 | 
				
			||||||
  set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
 | 
					  set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION")
 | 
				
			||||||
endif ()
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					set(CCOMMON_OPT "${CCOMMON_OPT} -DVERSION=\"\\\"${OpenBLAS_VERSION}\\\"\"")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
set(REVISION "-r${OpenBLAS_VERSION}")
 | 
					set(REVISION "-r${OpenBLAS_VERSION}")
 | 
				
			||||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
 | 
					set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION})
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,6 +10,16 @@ if (${HOST_OS} STREQUAL "WINDOWS")
 | 
				
			||||||
  set(HOST_OS WINNT)
 | 
					  set(HOST_OS WINNT)
 | 
				
			||||||
endif ()
 | 
					endif ()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if (${HOST_OS} STREQUAL "LINUX")
 | 
				
			||||||
 | 
					# check if we're building natively on Android (TERMUX)
 | 
				
			||||||
 | 
					    EXECUTE_PROCESS( COMMAND uname -o COMMAND tr -d '\n' OUTPUT_VARIABLE OPERATING_SYSTEM)
 | 
				
			||||||
 | 
					      if(${OPERATING_SYSTEM} MATCHES "Android")
 | 
				
			||||||
 | 
					        set(HOST_OS ANDROID)
 | 
				
			||||||
 | 
					      endif(${OPERATING_SYSTEM} MATCHES "Android")
 | 
				
			||||||
 | 
					endif()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
 | 
					if(CMAKE_COMPILER_IS_GNUCC AND WIN32)
 | 
				
			||||||
    execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
 | 
					    execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpmachine
 | 
				
			||||||
              OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE
 | 
					              OUTPUT_VARIABLE OPENBLAS_GCC_TARGET_MACHINE
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -175,9 +175,9 @@ int detect(void){
 | 
				
			||||||
  return  CPUTYPE_PPC970;
 | 
					  return  CPUTYPE_PPC970;
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__DragonFly__)
 | 
					#if defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__NetBSD__)
 | 
				
			||||||
int id;
 | 
					int id;
 | 
				
			||||||
id = __asm __volatile("mfpvr %0" : "=r"(id));
 | 
					__asm __volatile("mfpvr %0" : "=r"(id));
 | 
				
			||||||
switch ( id >> 16 ) {
 | 
					switch ( id >> 16 ) {
 | 
				
			||||||
  case 0x4e: // POWER9
 | 
					  case 0x4e: // POWER9
 | 
				
			||||||
    return CPUTYPE_POWER8;
 | 
					    return CPUTYPE_POWER8;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2586,7 +2586,7 @@ void *blas_memory_alloc(int procpos){
 | 
				
			||||||
  printf("Alloc Start ...\n");
 | 
					  printf("Alloc Start ...\n");
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(WHEREAMI) && !defined(USE_OPENMP)
 | 
					/* #if defined(WHEREAMI) && !defined(USE_OPENMP)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  mypos = WhereAmI();
 | 
					  mypos = WhereAmI();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2596,12 +2596,12 @@ void *blas_memory_alloc(int procpos){
 | 
				
			||||||
  do {
 | 
					  do {
 | 
				
			||||||
    if (!memory[position].used && (memory[position].pos == mypos)) {
 | 
					    if (!memory[position].used && (memory[position].pos == mypos)) {
 | 
				
			||||||
      LOCK_COMMAND(&alloc_lock);
 | 
					      LOCK_COMMAND(&alloc_lock);
 | 
				
			||||||
/*      blas_lock(&memory[position].lock);*/
 | 
					//      blas_lock(&memory[position].lock);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      if (!memory[position].used) goto allocation;
 | 
					      if (!memory[position].used) goto allocation;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      UNLOCK_COMMAND(&alloc_lock);
 | 
					      UNLOCK_COMMAND(&alloc_lock);
 | 
				
			||||||
/*      blas_unlock(&memory[position].lock);*/
 | 
					//      blas_unlock(&memory[position].lock);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    position ++;
 | 
					    position ++;
 | 
				
			||||||
| 
						 | 
					@ -2609,7 +2609,7 @@ void *blas_memory_alloc(int procpos){
 | 
				
			||||||
  } while (position < NUM_BUFFERS);
 | 
					  } while (position < NUM_BUFFERS);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  position = 0;
 | 
					  position = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -42,6 +42,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static char* openblas_config_str=""
 | 
					static char* openblas_config_str=""
 | 
				
			||||||
 | 
					"OpenBLAS "
 | 
				
			||||||
 | 
					 VERSION
 | 
				
			||||||
 | 
					" "
 | 
				
			||||||
#ifdef USE64BITINT
 | 
					#ifdef USE64BITINT
 | 
				
			||||||
  " USE64BITINT "
 | 
					  " USE64BITINT "
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,6 +7,7 @@ ZROTKERNEL = ../mips/zrot.c
 | 
				
			||||||
CSWAPKERNEL = ../mips/zswap.c
 | 
					CSWAPKERNEL = ../mips/zswap.c
 | 
				
			||||||
ZSWAPKERNEL = ../mips/zswap.c
 | 
					ZSWAPKERNEL = ../mips/zswap.c
 | 
				
			||||||
                                                                                        
 | 
					                                                                                        
 | 
				
			||||||
 | 
					                                                                                                                                          
 | 
				
			||||||
ifndef SNRM2KERNEL
 | 
					ifndef SNRM2KERNEL
 | 
				
			||||||
SNRM2KERNEL = snrm2.S
 | 
					SNRM2KERNEL = snrm2.S
 | 
				
			||||||
endif
 | 
					endif
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -63,6 +63,7 @@ ZTRSMKERNEL_LT	= ../generic/trsm_kernel_LT.c
 | 
				
			||||||
ZTRSMKERNEL_RN	= ../generic/trsm_kernel_RN.c
 | 
					ZTRSMKERNEL_RN	= ../generic/trsm_kernel_RN.c
 | 
				
			||||||
ZTRSMKERNEL_RT	= ../generic/trsm_kernel_RT.c
 | 
					ZTRSMKERNEL_RT	= ../generic/trsm_kernel_RT.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DSDOTKERNEL     = ../mips/dot.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -146,11 +146,11 @@
 | 
				
			||||||
	sd	$21,  40($sp)
 | 
						sd	$21,  40($sp)
 | 
				
			||||||
	sd	$22,  48($sp)
 | 
						sd	$22,  48($sp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	ST	$f24, 56($sp)
 | 
						sdc1	$f24, 56($sp)
 | 
				
			||||||
	ST	$f25, 64($sp)
 | 
						sdc1	$f25, 64($sp)
 | 
				
			||||||
	ST	$f26, 72($sp)
 | 
						sdc1	$f26, 72($sp)
 | 
				
			||||||
	ST	$f27, 80($sp)
 | 
						sdc1	$f27, 80($sp)
 | 
				
			||||||
	ST	$f28, 88($sp)
 | 
						sdc1	$f28, 88($sp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(TRMMKERNEL)
 | 
					#if defined(TRMMKERNEL)
 | 
				
			||||||
	sd	$23,  96($sp)
 | 
						sd	$23,  96($sp)
 | 
				
			||||||
| 
						 | 
					@ -161,10 +161,10 @@
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef __64BIT__
 | 
					#ifndef __64BIT__
 | 
				
			||||||
	ST	$f20,120($sp)
 | 
						sdc1	$f20,120($sp)
 | 
				
			||||||
	ST	$f21,128($sp)
 | 
						sdc1	$f21,128($sp)
 | 
				
			||||||
	ST	$f22,136($sp)
 | 
						sdc1	$f22,136($sp)
 | 
				
			||||||
	ST	$f23,144($sp)
 | 
						sdc1	$f23,144($sp)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	.align	4
 | 
						.align	4
 | 
				
			||||||
| 
						 | 
					@ -7766,11 +7766,11 @@
 | 
				
			||||||
	ld	$21,  40($sp)
 | 
						ld	$21,  40($sp)
 | 
				
			||||||
	ld	$22,  48($sp)
 | 
						ld	$22,  48($sp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	LD	$f24, 56($sp)
 | 
						ldc1	$f24, 56($sp)
 | 
				
			||||||
	LD	$f25, 64($sp)
 | 
						ldc1	$f25, 64($sp)
 | 
				
			||||||
	LD	$f26, 72($sp)
 | 
						ldc1	$f26, 72($sp)
 | 
				
			||||||
	LD	$f27, 80($sp)
 | 
						ldc1	$f27, 80($sp)
 | 
				
			||||||
	LD	$f28, 88($sp)
 | 
						ldc1	$f28, 88($sp)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#if defined(TRMMKERNEL)
 | 
					#if defined(TRMMKERNEL)
 | 
				
			||||||
	ld	$23,  96($sp)
 | 
						ld	$23,  96($sp)
 | 
				
			||||||
| 
						 | 
					@ -7779,10 +7779,10 @@
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#ifndef __64BIT__
 | 
					#ifndef __64BIT__
 | 
				
			||||||
	LD	$f20,120($sp)
 | 
						ldc1	$f20,120($sp)
 | 
				
			||||||
	LD	$f21,128($sp)
 | 
						ldc1	$f21,128($sp)
 | 
				
			||||||
	LD	$f22,136($sp)
 | 
						ldc1	$f22,136($sp)
 | 
				
			||||||
	LD	$f23,144($sp)
 | 
						ldc1	$f23,144($sp)
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	daddiu	$sp,$sp,STACKSIZE
 | 
						daddiu	$sp,$sp,STACKSIZE
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -56,7 +56,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT beta,
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (n == 0 || m == 0)
 | 
					  if (n == 0 || m == 0)
 | 
				
			||||||
	return;
 | 
						return 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  c_offset = c;
 | 
					  c_offset = c;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue