diff --git a/CMakeLists.txt b/CMakeLists.txt index b5789119a..f49f20513 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) project(OpenBLAS C ASM) set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MINOR_VERSION 3) -set(OpenBLAS_PATCH_VERSION 0.dev) +set(OpenBLAS_PATCH_VERSION 1.dev) set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") # Adhere to GNU filesystem layout conventions diff --git a/Makefile b/Makefile index c0e5fbcf8..380ba1ce8 100644 --- a/Makefile +++ b/Makefile @@ -294,9 +294,10 @@ endif lapack-test : (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) - $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc + $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz + $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc ifneq ($(CROSS), 1) - ( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ + ( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \ ./testsecond; ./testdsecnd; ./testieee; ./testversion ) (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) endif @@ -308,9 +309,9 @@ lapack-runtest: blas-test: - (cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) + (cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out) $(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing - (cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out) + (cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out) dummy : diff --git a/Makefile.rule b/Makefile.rule index 12734464b..1b4b8eb63 100644 --- a/Makefile.rule +++ b/Makefile.rule @@ -3,7 +3,7 @@ # # This library's version -VERSION = 0.3.0.dev +VERSION = 0.3.1.dev # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library diff --git a/c_check b/c_check index a3b337602..dfe99350a 100644 --- a/c_check +++ b/c_check @@ -201,6 +201,21 @@ $architecture = zarch if ($data =~ /ARCH_ZARCH/); $binformat = bin32; $binformat = bin64 if ($data =~ /BINARY_64/); +$no_avx512= 0; +if (($architecture eq "x86") || ($architecture eq "x86_64")) { + $code = '"vaddps %zmm1, %zmm0, %zmm0"'; + print $tmpf "void main(void){ __asm__ volatile($code); }\n"; + $args = " -o $tmpf.o -x c $tmpf"; + my @cmd = ("$compiler_name $args"); + system(@cmd) == 0; + if ($? != 0) { + $no_avx512 = 1; + } else { + $no_avx512 = 0; + } + unlink("tmpf.o"); +} + $data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; $data =~ /globl\s([_\.]*)(.*)/; @@ -288,6 +303,7 @@ print MAKEFILE "CROSS=1\n" if $cross != 0; print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; +print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; $os =~ tr/[a-z]/[A-Z]/; $architecture =~ tr/[a-z]/[A-Z]/; diff --git a/cmake/system_check.cmake b/cmake/system_check.cmake index d47c38cdd..f054852bf 100644 --- a/cmake/system_check.cmake +++ b/cmake/system_check.cmake @@ -66,3 +66,12 @@ else() set(BINARY32 1) endif() +if (X86_64 OR X86) + file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "void main(void){ __asm__ volatile(\"vaddps %zmm1, %zmm0, %zmm0\"); }") +execute_process(COMMAND ${CMAKE_C_COMPILER} -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp RESULT_VARIABLE NO_AVX512) +if (NO_AVX512 EQUAL 1) +set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512") +endif() + file(REMOVE "avx512.tmp" "avx512.o") +endif() + diff --git a/common.h b/common.h index ecf07316d..cd1c4c0d1 100644 --- a/common.h +++ b/common.h @@ -642,6 +642,7 @@ void gotoblas_profile_init(void); void gotoblas_profile_quit(void); #ifdef USE_OPENMP + #ifndef C_MSVC int omp_in_parallel(void); int omp_get_num_procs(void); @@ -663,7 +664,6 @@ __declspec(dllimport) int __cdecl omp_get_num_procs(void); #define _Atomic volatile #endif - #else #ifdef __ELF__ int omp_in_parallel (void) __attribute__ ((weak)); diff --git a/driver/level3/Makefile b/driver/level3/Makefile index 352225206..e320092e3 100644 --- a/driver/level3/Makefile +++ b/driver/level3/Makefile @@ -362,7 +362,7 @@ cgemm_ct.$(SUFFIX) : gemm.c level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm_cr.$(SUFFIX) : gemm.c level3.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm_cc.$(SUFFIX) : gemm.c level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -410,7 +410,7 @@ zgemm_ct.$(SUFFIX) : gemm.c level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm_cr.$(SUFFIX) : gemm.c level3.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm_cc.$(SUFFIX) : gemm.c level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -458,7 +458,7 @@ xgemm_ct.$(SUFFIX) : gemm.c level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm_cr.$(SUFFIX) : gemm.c level3.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm_cc.$(SUFFIX) : gemm.c level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -558,7 +558,7 @@ cgemm_thread_ct.$(SUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm_thread_cr.$(SUFFIX) : gemm.c level3_thread.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm_thread_cc.$(SUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -606,7 +606,7 @@ zgemm_thread_ct.$(SUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm_thread_cr.$(SUFFIX) : gemm.c level3_thread.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm_thread_cc.$(SUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -654,7 +654,7 @@ xgemm_thread_ct.$(SUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm_thread_cr.$(SUFFIX) : gemm.c level3_thread.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm_thread_cc.$(SUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -1821,7 +1821,7 @@ cgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c - $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(CFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -1869,7 +1869,7 @@ zgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c - $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(CFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -1917,7 +1917,7 @@ xgemm3m_ct.$(SUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm3m_cr.$(SUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm3m_cc.$(SUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -1974,7 +1974,7 @@ cgemm3m_thread_ct.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm3m_thread_cr.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm3m_thread_cc.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -2022,7 +2022,7 @@ zgemm3m_thread_ct.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm3m_thread_cr.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm3m_thread_cc.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -2070,7 +2070,7 @@ xgemm3m_thread_ct.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm3m_thread_cr.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h - $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm3m_thread_cc.$(SUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(CFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -2731,7 +2731,7 @@ cgemm_ct.$(PSUFFIX) : gemm.c level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm_cr.$(PSUFFIX) : gemm.c level3.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm_cc.$(PSUFFIX) : gemm.c level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -2779,7 +2779,7 @@ zgemm_ct.$(PSUFFIX) : gemm.c level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm_cr.$(PSUFFIX) : gemm.c level3.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm_cc.$(PSUFFIX) : gemm.c level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -2827,7 +2827,7 @@ xgemm_ct.$(PSUFFIX) : gemm.c level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm_cr.$(PSUFFIX) : gemm.c level3.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm_cc.$(PSUFFIX) : gemm.c level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -2927,7 +2927,7 @@ cgemm_thread_ct.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm_thread_cr.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm_thread_cc.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -2975,7 +2975,7 @@ zgemm_thread_ct.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm_thread_cr.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm_thread_cc.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -3023,7 +3023,7 @@ xgemm_thread_ct.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm_thread_cr.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm_thread_cc.$(PSUFFIX) : gemm.c level3_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -4190,7 +4190,7 @@ cgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c - $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(PFLAGS) $(BLOCKS) -c -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -4238,7 +4238,7 @@ zgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c - $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c $(CC) $(PFLAGS) $(BLOCKS) -c -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -4286,7 +4286,7 @@ xgemm3m_ct.$(PSUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm3m_cr.$(PSUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm3m_cc.$(PSUFFIX) : gemm3m.c gemm3m_level3.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -4343,7 +4343,7 @@ cgemm3m_thread_ct.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCT $< -o $(@F) cgemm3m_thread_cr.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) cgemm3m_thread_cc.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -UDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -4391,7 +4391,7 @@ zgemm3m_thread_ct.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCT $< -o $(@F) zgemm3m_thread_cr.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) zgemm3m_thread_cc.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DDOUBLE -DCOMPLEX -DCC $< -o $(@F) @@ -4439,7 +4439,7 @@ xgemm3m_thread_ct.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCT $< -o $(@F) xgemm3m_thread_cr.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h - $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR $< -o $(@F) + $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCR=CR $< -o $(@F) xgemm3m_thread_cc.$(PSUFFIX) : gemm3m.c level3_gemm3m_thread.c ../../param.h $(CC) $(PFLAGS) $(BLOCKS) -c -DTHREADED_LEVEL3 -DXDOUBLE -DCOMPLEX -DCC $< -o $(@F) diff --git a/driver/others/memory.c b/driver/others/memory.c index ef328b945..d69e52e97 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -180,7 +180,7 @@ int get_num_procs(void) { cpu_set_t *cpusetp; size_t size; int ret; -// int i,n; +int i,n; if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); #if !defined(OS_LINUX) diff --git a/exports/Makefile b/exports/Makefile index 53d4f75bb..127b05057 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -128,6 +128,8 @@ so : ../$(LIBSONAME) ifeq ($(OSNAME), Android) INTERNALNAME = $(LIBPREFIX).so +FEXTRALIB += -lm +EXTRALIB += -lm else INTERNALNAME = $(LIBPREFIX).so.$(MAJOR_VERSION) endif diff --git a/kernel/power/KERNEL.POWER8 b/kernel/power/KERNEL.POWER8 index 00ff8682a..1aa061078 100644 --- a/kernel/power/KERNEL.POWER8 +++ b/kernel/power/KERNEL.POWER8 @@ -133,7 +133,7 @@ ZNRM2KERNEL = ../arm/znrm2.c # SROTKERNEL = srot.c DROTKERNEL = drot.c -#CROTKERNEL = ../arm/zrot.c +CROTKERNEL = zrot.c ZROTKERNEL = zrot.c # SSCALKERNEL = sscal.c diff --git a/kernel/x86/KERNEL.NEHALEM b/kernel/x86/KERNEL.NEHALEM index 835520efb..65b03ae50 100644 --- a/kernel/x86/KERNEL.NEHALEM +++ b/kernel/x86/KERNEL.NEHALEM @@ -1,3 +1 @@ include $(KERNELDIR)/KERNEL.PENRYN -SSWAPKERNEL = ../arm/swap.c -DSWAPKERNEL = ../arm/swap.c diff --git a/kernel/x86/swap.S b/kernel/x86/swap.S index 54b00b33e..e30c27898 100644 --- a/kernel/x86/swap.S +++ b/kernel/x86/swap.S @@ -138,6 +138,14 @@ /* INCX != 1 or INCY != 1 */ .L14: + cmpl $0, %ebx + jne .L141 + cmpl $0, %ecx + jne .L141 +/* INCX == 0 and INCY == 0 */ + jmp .L27 + +.L141: movl %edx, %eax sarl $2, %eax jle .L28 diff --git a/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_work.c b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_work.c index b4a7595d8..e4d538779 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_chetrf_aa_work.c @@ -41,7 +41,7 @@ lapack_int LAPACKE_chetrf_aa_work( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ - LAPACK_chetrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); + LAPACK_chetrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } @@ -56,7 +56,7 @@ lapack_int LAPACKE_chetrf_aa_work( int matrix_layout, char uplo, lapack_int n, } /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { - LAPACK_chetrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_chetrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ @@ -69,7 +69,7 @@ lapack_int LAPACKE_chetrf_aa_work( int matrix_layout, char uplo, lapack_int n, /* Transpose input matrices */ LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ - LAPACK_chetrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_chetrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_work.c b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_work.c index d4f24142b..f6661c85c 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_csytrf_aa_work.c @@ -41,7 +41,7 @@ lapack_int LAPACKE_csytrf_aa_work( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ - LAPACK_csytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); + LAPACK_csytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } @@ -56,7 +56,7 @@ lapack_int LAPACKE_csytrf_aa_work( int matrix_layout, char uplo, lapack_int n, } /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { - LAPACK_csytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_csytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ @@ -69,7 +69,7 @@ lapack_int LAPACKE_csytrf_aa_work( int matrix_layout, char uplo, lapack_int n, /* Transpose input matrices */ LAPACKE_csy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ - LAPACK_csytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_csytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_work.c b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_work.c index cbf97b632..e72bfa6de 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dsytrf_aa_work.c @@ -40,7 +40,7 @@ lapack_int LAPACKE_dsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ - LAPACK_dsytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); + LAPACK_dsytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } @@ -55,7 +55,7 @@ lapack_int LAPACKE_dsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, } /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { - LAPACK_dsytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_dsytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ @@ -67,7 +67,7 @@ lapack_int LAPACKE_dsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, /* Transpose input matrices */ LAPACKE_dsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ - LAPACK_dsytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_dsytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_work.c b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_work.c index d68cb17c1..182946a45 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_ssytrf_aa_work.c @@ -40,7 +40,7 @@ lapack_int LAPACKE_ssytrf_aa_work( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ - LAPACK_ssytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); + LAPACK_ssytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } @@ -55,7 +55,7 @@ lapack_int LAPACKE_ssytrf_aa_work( int matrix_layout, char uplo, lapack_int n, } /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { - LAPACK_ssytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_ssytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ @@ -67,7 +67,7 @@ lapack_int LAPACKE_ssytrf_aa_work( int matrix_layout, char uplo, lapack_int n, /* Transpose input matrices */ LAPACKE_ssy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ - LAPACK_ssytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_ssytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_work.c b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_work.c index 5214217fb..dbad2d81e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zhetrf_aa_work.c @@ -41,7 +41,7 @@ lapack_int LAPACKE_zhetrf_aa_work( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ - LAPACK_zhetrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); + LAPACK_zhetrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } @@ -56,7 +56,7 @@ lapack_int LAPACKE_zhetrf_aa_work( int matrix_layout, char uplo, lapack_int n, } /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { - LAPACK_zhetrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_zhetrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ @@ -69,7 +69,7 @@ lapack_int LAPACKE_zhetrf_aa_work( int matrix_layout, char uplo, lapack_int n, /* Transpose input matrices */ LAPACKE_zhe_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ - LAPACK_zhetrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_zhetrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_work.c b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_work.c index 29d75319e..03726c63e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zsytrf_aa_work.c @@ -41,7 +41,7 @@ lapack_int LAPACKE_zsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, lapack_int info = 0; if( matrix_layout == LAPACK_COL_MAJOR ) { /* Call LAPACK function and adjust info */ - LAPACK_zsytrf( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); + LAPACK_zsytrf_aa( &uplo, &n, a, &lda, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } @@ -56,7 +56,7 @@ lapack_int LAPACKE_zsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, } /* Query optimal working array(s) size if requested */ if( lwork == -1 ) { - LAPACK_zsytrf( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_zsytrf_aa( &uplo, &n, a, &lda_t, ipiv, work, &lwork, &info ); return (info < 0) ? (info - 1) : info; } /* Allocate memory for temporary array(s) */ @@ -69,7 +69,7 @@ lapack_int LAPACKE_zsytrf_aa_work( int matrix_layout, char uplo, lapack_int n, /* Transpose input matrices */ LAPACKE_zsy_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t ); /* Call LAPACK function and adjust info */ - LAPACK_zsytrf( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); + LAPACK_zsytrf_aa( &uplo, &n, a_t, &lda_t, ipiv, work, &lwork, &info ); if( info < 0 ) { info = info - 1; } diff --git a/lapack-netlib/SRC/cgejsv.f b/lapack-netlib/SRC/cgejsv.f index 8eb43cf50..a7b1c451c 100644 --- a/lapack-netlib/SRC/cgejsv.f +++ b/lapack-netlib/SRC/cgejsv.f @@ -701,7 +701,7 @@ LWSVDJ = MAX( 2 * N, 1 ) LWSVDJV = MAX( 2 * N, 1 ) * .. minimal REAL workspace length for CGEQP3, CPOCON, CGESVJ - LRWQP3 = N + LRWQP3 = 2 * N LRWCON = N LRWSVDJ = N IF ( LQUERY ) THEN @@ -939,7 +939,7 @@ END IF END IF MINWRK = MAX( 2, MINWRK ) - OPTWRK = MAX( 2, OPTWRK ) + OPTWRK = MAX( OPTWRK, MINWRK ) IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 END IF diff --git a/lapack-netlib/SRC/chesv_aa.f b/lapack-netlib/SRC/chesv_aa.f index 0bf636b48..470f910bc 100644 --- a/lapack-netlib/SRC/chesv_aa.f +++ b/lapack-netlib/SRC/chesv_aa.f @@ -209,6 +209,8 @@ INFO = -5 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -8 + ELSE IF( LWORK.LT.MAX( 2*N, 3*N-2 ) .AND. .NOT.LQUERY ) THEN + INFO = -10 END IF * IF( INFO.EQ.0 ) THEN @@ -219,9 +221,6 @@ LWKOPT_HETRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_HETRF, LWKOPT_HETRS ) WORK( 1 ) = LWKOPT - IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN - INFO = -10 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/chesv_aa_2stage.f b/lapack-netlib/SRC/chesv_aa_2stage.f index 057d9c57a..05f6b7bb7 100644 --- a/lapack-netlib/SRC/chesv_aa_2stage.f +++ b/lapack-netlib/SRC/chesv_aa_2stage.f @@ -105,6 +105,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -124,7 +125,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -150,6 +151,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -233,19 +235,18 @@ INFO = -3 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 + ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN + INFO = -7 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN CALL CHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, $ IPIV2, WORK, -1, INFO ) LWKOPT = INT( WORK(1) ) - IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN - INFO = -7 - ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -270,6 +271,8 @@ END IF * WORK( 1 ) = LWKOPT +* + RETURN * * End of CHESV_AA_2STAGE * diff --git a/lapack-netlib/SRC/chetrf_aa_2stage.f b/lapack-netlib/SRC/chetrf_aa_2stage.f index 0fa2ae3a0..ce34d73cc 100644 --- a/lapack-netlib/SRC/chetrf_aa_2stage.f +++ b/lapack-netlib/SRC/chetrf_aa_2stage.f @@ -93,6 +93,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -112,7 +113,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -125,6 +126,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -658,6 +660,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) * * Factor the band matrix CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* + RETURN * * End of CHETRF_AA_2STAGE * diff --git a/lapack-netlib/SRC/chetrs_aa_2stage.f b/lapack-netlib/SRC/chetrs_aa_2stage.f index 3f8576673..05d09275b 100644 --- a/lapack-netlib/SRC/chetrs_aa_2stage.f +++ b/lapack-netlib/SRC/chetrs_aa_2stage.f @@ -87,6 +87,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N. *> \endverbatim *> diff --git a/lapack-netlib/SRC/cla_syamv.f b/lapack-netlib/SRC/cla_syamv.f index e1d3df960..695b5e478 100644 --- a/lapack-netlib/SRC/cla_syamv.f +++ b/lapack-netlib/SRC/cla_syamv.f @@ -241,7 +241,7 @@ INFO = 10 END IF IF( INFO.NE.0 )THEN - CALL XERBLA( 'SSYMV ', INFO ) + CALL XERBLA( 'CLA_SYAMV', INFO ) RETURN END IF * diff --git a/lapack-netlib/SRC/claqr1.f b/lapack-netlib/SRC/claqr1.f index b76bedf60..977947196 100644 --- a/lapack-netlib/SRC/claqr1.f +++ b/lapack-netlib/SRC/claqr1.f @@ -142,6 +142,13 @@ CABS1( CDUM ) = ABS( REAL( CDUM ) ) + ABS( AIMAG( CDUM ) ) * .. * .. Executable Statements .. +* +* Quick return if possible +* + IF( N.NE.2 .AND. N.NE.3 ) THEN + RETURN + END IF +* IF( N.EQ.2 ) THEN S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) IF( S.EQ.RZERO ) THEN diff --git a/lapack-netlib/SRC/csysv_aa.f b/lapack-netlib/SRC/csysv_aa.f index 9cd669d33..87be734cc 100644 --- a/lapack-netlib/SRC/csysv_aa.f +++ b/lapack-netlib/SRC/csysv_aa.f @@ -221,9 +221,6 @@ LWKOPT_SYTRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) WORK( 1 ) = LWKOPT - IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN - INFO = -10 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/csysv_aa_2stage.f b/lapack-netlib/SRC/csysv_aa_2stage.f index cba57fc3e..a13349824 100644 --- a/lapack-netlib/SRC/csysv_aa_2stage.f +++ b/lapack-netlib/SRC/csysv_aa_2stage.f @@ -105,6 +105,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -124,7 +125,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -150,6 +151,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -233,19 +235,18 @@ INFO = -3 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 + ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN + INFO = -7 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN CALL CSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, $ IPIV2, WORK, -1, INFO ) LWKOPT = INT( WORK(1) ) - IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN - INFO = -7 - ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN @@ -270,6 +271,8 @@ END IF * WORK( 1 ) = LWKOPT +* + RETURN * * End of CSYSV_AA_2STAGE * diff --git a/lapack-netlib/SRC/csytrf_aa_2stage.f b/lapack-netlib/SRC/csytrf_aa_2stage.f index 0a6bfbe31..0d0bd156c 100644 --- a/lapack-netlib/SRC/csytrf_aa_2stage.f +++ b/lapack-netlib/SRC/csytrf_aa_2stage.f @@ -93,6 +93,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -112,7 +113,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -125,6 +126,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -662,6 +664,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) * * Factor the band matrix CALL CGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* + RETURN * * End of CSYTRF_AA_2STAGE * diff --git a/lapack-netlib/SRC/csytri2.f b/lapack-netlib/SRC/csytri2.f index 4c6baaa3e..4bd8e4f99 100644 --- a/lapack-netlib/SRC/csytri2.f +++ b/lapack-netlib/SRC/csytri2.f @@ -96,11 +96,11 @@ *> LWORK is INTEGER *> The dimension of the array WORK. *> WORK is size >= (N+NB+1)*(NB+3) -*> If LDWORK = -1, then a workspace query is assumed; the routine +*> If LWORK = -1, then a workspace query is assumed; the routine *> calculates: *> - the optimal size of the WORK array, returns *> this value as the first entry of the WORK array, -*> - and no error message related to LDWORK is issued by XERBLA. +*> - and no error message related to LWORK is issued by XERBLA. *> \endverbatim *> *> \param[out] INFO @@ -163,7 +163,7 @@ UPPER = LSAME( UPLO, 'U' ) LQUERY = ( LWORK.EQ.-1 ) * Get blocksize - NBMAX = ILAENV( 1, 'CSYTRF', UPLO, N, -1, -1, -1 ) + NBMAX = ILAENV( 1, 'CSYTRI2', UPLO, N, -1, -1, -1 ) IF ( NBMAX .GE. N ) THEN MINSIZE = N ELSE diff --git a/lapack-netlib/SRC/csytrs_aa_2stage.f b/lapack-netlib/SRC/csytrs_aa_2stage.f index 03bccda82..d025c08fe 100644 --- a/lapack-netlib/SRC/csytrs_aa_2stage.f +++ b/lapack-netlib/SRC/csytrs_aa_2stage.f @@ -85,6 +85,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N. *> \endverbatim *> diff --git a/lapack-netlib/SRC/ctrevc3.f b/lapack-netlib/SRC/ctrevc3.f index c06b40477..a134c1a50 100644 --- a/lapack-netlib/SRC/ctrevc3.f +++ b/lapack-netlib/SRC/ctrevc3.f @@ -27,8 +27,8 @@ * .. * .. Array Arguments .. * LOGICAL SELECT( * ) -* REAL RWORK( * ) -* COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* REAL RWORK( * ) +* COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), * $ WORK( * ) * .. * @@ -258,17 +258,17 @@ * .. * .. Array Arguments .. LOGICAL SELECT( * ) - REAL RWORK( * ) - COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + REAL RWORK( * ) + COMPLEX T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), $ WORK( * ) * .. * * ===================================================================== * * .. Parameters .. - REAL ZERO, ONE + REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) - COMPLEX CZERO, CONE + COMPLEX CZERO, CONE PARAMETER ( CZERO = ( 0.0E+0, 0.0E+0 ), $ CONE = ( 1.0E+0, 0.0E+0 ) ) INTEGER NBMIN, NBMAX @@ -277,13 +277,13 @@ * .. Local Scalars .. LOGICAL ALLV, BOTHV, LEFTV, LQUERY, OVER, RIGHTV, SOMEV INTEGER I, II, IS, J, K, KI, IV, MAXWRK, NB - REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL - COMPLEX CDUM + REAL OVFL, REMAX, SCALE, SMIN, SMLNUM, ULP, UNFL + COMPLEX CDUM * .. * .. External Functions .. LOGICAL LSAME INTEGER ILAENV, ICAMAX - REAL SLAMCH, SCASUM + REAL SLAMCH, SCASUM EXTERNAL LSAME, ILAENV, ICAMAX, SLAMCH, SCASUM * .. * .. External Subroutines .. diff --git a/lapack-netlib/SRC/dgelqt.f b/lapack-netlib/SRC/dgelqt.f index 2124f3dc3..5b4ee65b5 100644 --- a/lapack-netlib/SRC/dgelqt.f +++ b/lapack-netlib/SRC/dgelqt.f @@ -158,7 +158,7 @@ INTEGER I, IB, IINFO, K * .. * .. External Subroutines .. - EXTERNAL DGEQRT2, DGELQT3, DGEQRT3, DLARFB, XERBLA + EXTERNAL DGELQT3, DLARFB, XERBLA * .. * .. Executable Statements .. * diff --git a/lapack-netlib/SRC/dla_syamv.f b/lapack-netlib/SRC/dla_syamv.f index 29566a6e9..bb6dbe288 100644 --- a/lapack-netlib/SRC/dla_syamv.f +++ b/lapack-netlib/SRC/dla_syamv.f @@ -230,7 +230,7 @@ INFO = 10 END IF IF( INFO.NE.0 )THEN - CALL XERBLA( 'DSYMV ', INFO ) + CALL XERBLA( 'DLA_SYAMV', INFO ) RETURN END IF * diff --git a/lapack-netlib/SRC/dlaqr1.f b/lapack-netlib/SRC/dlaqr1.f index 81a462fb3..795b072ab 100644 --- a/lapack-netlib/SRC/dlaqr1.f +++ b/lapack-netlib/SRC/dlaqr1.f @@ -147,6 +147,13 @@ INTRINSIC ABS * .. * .. Executable Statements .. +* +* Quick return if possible +* + IF( N.NE.2 .AND. N.NE.3 ) THEN + RETURN + END IF +* IF( N.EQ.2 ) THEN S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) IF( S.EQ.ZERO ) THEN diff --git a/lapack-netlib/SRC/dsysv_aa.f b/lapack-netlib/SRC/dsysv_aa.f index cbccd5e65..7192928c6 100644 --- a/lapack-netlib/SRC/dsysv_aa.f +++ b/lapack-netlib/SRC/dsysv_aa.f @@ -221,9 +221,6 @@ LWKOPT_SYTRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) WORK( 1 ) = LWKOPT - IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN - INFO = -10 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/dsysv_aa_2stage.f b/lapack-netlib/SRC/dsysv_aa_2stage.f index ac3c77d76..05e538f0b 100644 --- a/lapack-netlib/SRC/dsysv_aa_2stage.f +++ b/lapack-netlib/SRC/dsysv_aa_2stage.f @@ -107,6 +107,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -126,7 +127,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -152,6 +153,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -235,19 +237,18 @@ INFO = -3 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 + ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN + INFO = -7 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN CALL DSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, $ IPIV2, WORK, -1, INFO ) LWKOPT = INT( WORK(1) ) - IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN - INFO = -7 - ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/dsytrf_aa_2stage.f b/lapack-netlib/SRC/dsytrf_aa_2stage.f index f5f06cc1d..25fc1a2eb 100644 --- a/lapack-netlib/SRC/dsytrf_aa_2stage.f +++ b/lapack-netlib/SRC/dsytrf_aa_2stage.f @@ -93,6 +93,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -109,6 +110,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -128,10 +130,10 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the -*> row and column IPIV(k). +*> row and column IPIV2(k). *> \endverbatim *> *> \param[out] INFO @@ -641,6 +643,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) * * Factor the band matrix CALL DGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* + RETURN * * End of DSYTRF_AA_2STAGE * diff --git a/lapack-netlib/SRC/dsytri2.f b/lapack-netlib/SRC/dsytri2.f index 9aa21a854..23f8b9fa2 100644 --- a/lapack-netlib/SRC/dsytri2.f +++ b/lapack-netlib/SRC/dsytri2.f @@ -96,11 +96,11 @@ *> LWORK is INTEGER *> The dimension of the array WORK. *> WORK is size >= (N+NB+1)*(NB+3) -*> If LDWORK = -1, then a workspace query is assumed; the routine +*> If LWORK = -1, then a workspace query is assumed; the routine *> calculates: *> - the optimal size of the WORK array, returns *> this value as the first entry of the WORK array, -*> - and no error message related to LDWORK is issued by XERBLA. +*> - and no error message related to LWORK is issued by XERBLA. *> \endverbatim *> *> \param[out] INFO @@ -163,7 +163,7 @@ UPPER = LSAME( UPLO, 'U' ) LQUERY = ( LWORK.EQ.-1 ) * Get blocksize - NBMAX = ILAENV( 1, 'DSYTRF', UPLO, N, -1, -1, -1 ) + NBMAX = ILAENV( 1, 'DSYTRI2', UPLO, N, -1, -1, -1 ) IF ( NBMAX .GE. N ) THEN MINSIZE = N ELSE diff --git a/lapack-netlib/SRC/dsytrs_aa_2stage.f b/lapack-netlib/SRC/dsytrs_aa_2stage.f index caff5d4ad..bb283cb95 100644 --- a/lapack-netlib/SRC/dsytrs_aa_2stage.f +++ b/lapack-netlib/SRC/dsytrs_aa_2stage.f @@ -85,6 +85,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N. *> \endverbatim *> diff --git a/lapack-netlib/SRC/dtrevc3.f b/lapack-netlib/SRC/dtrevc3.f index 745f636d0..957baf4f0 100644 --- a/lapack-netlib/SRC/dtrevc3.f +++ b/lapack-netlib/SRC/dtrevc3.f @@ -45,9 +45,9 @@ *> The right eigenvector x and the left eigenvector y of T corresponding *> to an eigenvalue w are defined by: *> -*> T*x = w*x, (y**H)*T = w*(y**H) +*> T*x = w*x, (y**T)*T = w*(y**T) *> -*> where y**H denotes the conjugate transpose of y. +*> where y**T denotes the transpose of the vector y. *> The eigenvalues are not input to this routine, but are read directly *> from the diagonal blocks of T. *> diff --git a/lapack-netlib/SRC/iparmq.f b/lapack-netlib/SRC/iparmq.f index e576e0db0..a9212b3e0 100644 --- a/lapack-netlib/SRC/iparmq.f +++ b/lapack-netlib/SRC/iparmq.f @@ -104,13 +104,13 @@ *> *> \param[in] NAME *> \verbatim -*> NAME is character string +*> NAME is CHARACTER string *> Name of the calling subroutine *> \endverbatim *> *> \param[in] OPTS *> \verbatim -*> OPTS is character string +*> OPTS is CHARACTER string *> This is a concatenation of the string arguments to *> TTQRE. *> \endverbatim diff --git a/lapack-netlib/SRC/sla_syamv.f b/lapack-netlib/SRC/sla_syamv.f index d40e7bd95..4459f4d8b 100644 --- a/lapack-netlib/SRC/sla_syamv.f +++ b/lapack-netlib/SRC/sla_syamv.f @@ -230,7 +230,7 @@ INFO = 10 END IF IF( INFO.NE.0 )THEN - CALL XERBLA( 'SSYMV ', INFO ) + CALL XERBLA( 'SLA_SYAMV', INFO ) RETURN END IF * diff --git a/lapack-netlib/SRC/slaqr1.f b/lapack-netlib/SRC/slaqr1.f index 7d7d851ee..2de33849d 100644 --- a/lapack-netlib/SRC/slaqr1.f +++ b/lapack-netlib/SRC/slaqr1.f @@ -147,6 +147,13 @@ INTRINSIC ABS * .. * .. Executable Statements .. +* +* Quick return if possible +* + IF( N.NE.2 .AND. N.NE.3 ) THEN + RETURN + END IF +* IF( N.EQ.2 ) THEN S = ABS( H( 1, 1 )-SR2 ) + ABS( SI2 ) + ABS( H( 2, 1 ) ) IF( S.EQ.ZERO ) THEN diff --git a/lapack-netlib/SRC/ssysv_aa.f b/lapack-netlib/SRC/ssysv_aa.f index abf52b143..e470f5883 100644 --- a/lapack-netlib/SRC/ssysv_aa.f +++ b/lapack-netlib/SRC/ssysv_aa.f @@ -220,9 +220,6 @@ LWKOPT_SYTRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) WORK( 1 ) = LWKOPT - IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN - INFO = -10 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/ssysv_aa_2stage.f b/lapack-netlib/SRC/ssysv_aa_2stage.f index a738c7415..43d937141 100644 --- a/lapack-netlib/SRC/ssysv_aa_2stage.f +++ b/lapack-netlib/SRC/ssysv_aa_2stage.f @@ -106,6 +106,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -125,7 +126,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -151,6 +152,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -234,19 +236,18 @@ INFO = -3 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 + ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN + INFO = -7 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN CALL SSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, $ IPIV2, WORK, -1, INFO ) LWKOPT = INT( WORK(1) ) - IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN - INFO = -7 - ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/ssytrf_aa_2stage.f b/lapack-netlib/SRC/ssytrf_aa_2stage.f index a92974930..0e0f6edb7 100644 --- a/lapack-netlib/SRC/ssytrf_aa_2stage.f +++ b/lapack-netlib/SRC/ssytrf_aa_2stage.f @@ -93,6 +93,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -112,7 +113,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -125,6 +126,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -641,6 +643,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) * * Factor the band matrix CALL SGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* + RETURN * * End of SSYTRF_AA_2STAGE * diff --git a/lapack-netlib/SRC/ssytri2.f b/lapack-netlib/SRC/ssytri2.f index 97b539005..4b9ea4e7b 100644 --- a/lapack-netlib/SRC/ssytri2.f +++ b/lapack-netlib/SRC/ssytri2.f @@ -96,11 +96,11 @@ *> LWORK is INTEGER *> The dimension of the array WORK. *> WORK is size >= (N+NB+1)*(NB+3) -*> If LDWORK = -1, then a workspace query is assumed; the routine +*> If LWORK = -1, then a workspace query is assumed; the routine *> calculates: *> - the optimal size of the WORK array, returns *> this value as the first entry of the WORK array, -*> - and no error message related to LDWORK is issued by XERBLA. +*> - and no error message related to LWORK is issued by XERBLA. *> \endverbatim *> *> \param[out] INFO diff --git a/lapack-netlib/SRC/ssytrs_aa_2stage.f b/lapack-netlib/SRC/ssytrs_aa_2stage.f index c9c7181f2..d271b9481 100644 --- a/lapack-netlib/SRC/ssytrs_aa_2stage.f +++ b/lapack-netlib/SRC/ssytrs_aa_2stage.f @@ -85,6 +85,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N. *> \endverbatim *> diff --git a/lapack-netlib/SRC/strevc3.f b/lapack-netlib/SRC/strevc3.f index 0df1189f0..525978071 100644 --- a/lapack-netlib/SRC/strevc3.f +++ b/lapack-netlib/SRC/strevc3.f @@ -27,7 +27,7 @@ * .. * .. Array Arguments .. * LOGICAL SELECT( * ) -* REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), +* REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), * $ WORK( * ) * .. * @@ -45,9 +45,9 @@ *> The right eigenvector x and the left eigenvector y of T corresponding *> to an eigenvalue w are defined by: *> -*> T*x = w*x, (y**H)*T = w*(y**H) +*> T*x = w*x, (y**T)*T = w*(y**T) *> -*> where y**H denotes the conjugate transpose of y. +*> where y**T denotes the transpose of the vector y. *> The eigenvalues are not input to this routine, but are read directly *> from the diagonal blocks of T. *> @@ -251,14 +251,14 @@ * .. * .. Array Arguments .. LOGICAL SELECT( * ) - REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), + REAL T( LDT, * ), VL( LDVL, * ), VR( LDVR, * ), $ WORK( * ) * .. * * ===================================================================== * * .. Parameters .. - REAL ZERO, ONE + REAL ZERO, ONE PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) INTEGER NBMIN, NBMAX PARAMETER ( NBMIN = 8, NBMAX = 128 ) @@ -268,7 +268,7 @@ $ RIGHTV, SOMEV INTEGER I, IERR, II, IP, IS, J, J1, J2, JNXT, K, KI, $ IV, MAXWRK, NB, KI2 - REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, + REAL BETA, BIGNUM, EMAX, OVFL, REC, REMAX, SCALE, $ SMIN, SMLNUM, ULP, UNFL, VCRIT, VMAX, WI, WR, $ XNORM * .. diff --git a/lapack-netlib/SRC/zgejsv.f b/lapack-netlib/SRC/zgejsv.f index e8418c680..d553da90b 100644 --- a/lapack-netlib/SRC/zgejsv.f +++ b/lapack-netlib/SRC/zgejsv.f @@ -704,7 +704,7 @@ LWSVDJ = MAX( 2 * N, 1 ) LWSVDJV = MAX( 2 * N, 1 ) * .. minimal REAL workspace length for ZGEQP3, ZPOCON, ZGESVJ - LRWQP3 = N + LRWQP3 = 2 * N LRWCON = N LRWSVDJ = N IF ( LQUERY ) THEN @@ -942,7 +942,7 @@ END IF END IF MINWRK = MAX( 2, MINWRK ) - OPTWRK = MAX( 2, OPTWRK ) + OPTWRK = MAX( MINWRK, OPTWRK ) IF ( LWORK .LT. MINWRK .AND. (.NOT.LQUERY) ) INFO = - 17 IF ( LRWORK .LT. MINRWRK .AND. (.NOT.LQUERY) ) INFO = - 19 END IF diff --git a/lapack-netlib/SRC/zhesv_aa.f b/lapack-netlib/SRC/zhesv_aa.f index bbd0fdff4..8511f0e7d 100644 --- a/lapack-netlib/SRC/zhesv_aa.f +++ b/lapack-netlib/SRC/zhesv_aa.f @@ -209,6 +209,8 @@ INFO = -5 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -8 + ELSE IF( LWORK.LT.MAX(2*N, 3*N-2) .AND. .NOT.LQUERY ) THEN + INFO = -10 END IF * IF( INFO.EQ.0 ) THEN @@ -219,9 +221,6 @@ LWKOPT_HETRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_HETRF, LWKOPT_HETRS ) WORK( 1 ) = LWKOPT - IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN - INFO = -10 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/zhesv_aa_2stage.f b/lapack-netlib/SRC/zhesv_aa_2stage.f index a34440029..ed221dc69 100644 --- a/lapack-netlib/SRC/zhesv_aa_2stage.f +++ b/lapack-netlib/SRC/zhesv_aa_2stage.f @@ -106,6 +106,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -125,7 +126,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -151,6 +152,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -240,19 +242,18 @@ INFO = -3 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 + ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN + INFO = -7 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN CALL ZHETRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, $ IPIV2, WORK, -1, INFO ) LWKOPT = INT( WORK(1) ) - IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN - INFO = -7 - ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/zhetrf_aa_2stage.f b/lapack-netlib/SRC/zhetrf_aa_2stage.f index 4d62198d6..73c0ebe9a 100644 --- a/lapack-netlib/SRC/zhetrf_aa_2stage.f +++ b/lapack-netlib/SRC/zhetrf_aa_2stage.f @@ -93,6 +93,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -112,7 +113,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -125,6 +126,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -657,6 +659,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) * * Factor the band matrix CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* + RETURN * * End of ZHETRF_AA_2STAGE * diff --git a/lapack-netlib/SRC/zhetrs_aa_2stage.f b/lapack-netlib/SRC/zhetrs_aa_2stage.f index 02e17476f..7fcee1118 100644 --- a/lapack-netlib/SRC/zhetrs_aa_2stage.f +++ b/lapack-netlib/SRC/zhetrs_aa_2stage.f @@ -69,7 +69,7 @@ *> *> \param[in] A *> \verbatim -*> A is COMPLEX*16array, dimension (LDA,N) +*> A is COMPLEX*16 array, dimension (LDA,N) *> Details of factors computed by ZHETRF_AA_2STAGE. *> \endverbatim *> @@ -81,12 +81,13 @@ *> *> \param[out] TB *> \verbatim -*> TB is COMPLEX*16array, dimension (LTB) +*> TB is COMPLEX*16 array, dimension (LTB) *> Details of factors computed by ZHETRF_AA_2STAGE. *> \endverbatim *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N. *> \endverbatim *> @@ -106,7 +107,7 @@ *> *> \param[in,out] B *> \verbatim -*> B is COMPLEX*16array, dimension (LDB,NRHS) +*> B is COMPLEX*16 array, dimension (LDB,NRHS) *> On entry, the right hand side matrix B. *> On exit, the solution matrix X. *> \endverbatim diff --git a/lapack-netlib/SRC/zla_syamv.f b/lapack-netlib/SRC/zla_syamv.f index 02958bef3..cfdb3cdc8 100644 --- a/lapack-netlib/SRC/zla_syamv.f +++ b/lapack-netlib/SRC/zla_syamv.f @@ -241,7 +241,7 @@ INFO = 10 END IF IF( INFO.NE.0 )THEN - CALL XERBLA( 'DSYMV ', INFO ) + CALL XERBLA( 'ZLA_SYAMV', INFO ) RETURN END IF * diff --git a/lapack-netlib/SRC/zlaqr1.f b/lapack-netlib/SRC/zlaqr1.f index 03afb87aa..34341cb10 100644 --- a/lapack-netlib/SRC/zlaqr1.f +++ b/lapack-netlib/SRC/zlaqr1.f @@ -142,6 +142,13 @@ CABS1( CDUM ) = ABS( DBLE( CDUM ) ) + ABS( DIMAG( CDUM ) ) * .. * .. Executable Statements .. +* +* Quick return if possible +* + IF( N.NE.2 .AND. N.NE.3 ) THEN + RETURN + END IF +* IF( N.EQ.2 ) THEN S = CABS1( H( 1, 1 )-S2 ) + CABS1( H( 2, 1 ) ) IF( S.EQ.RZERO ) THEN diff --git a/lapack-netlib/SRC/zsysv_aa.f b/lapack-netlib/SRC/zsysv_aa.f index 10693c731..325d07c54 100644 --- a/lapack-netlib/SRC/zsysv_aa.f +++ b/lapack-netlib/SRC/zsysv_aa.f @@ -221,9 +221,6 @@ LWKOPT_SYTRS = INT( WORK(1) ) LWKOPT = MAX( LWKOPT_SYTRF, LWKOPT_SYTRS ) WORK( 1 ) = LWKOPT - IF( LWORK.LT.LWKOPT .AND. .NOT.LQUERY ) THEN - INFO = -10 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/zsysv_aa_2stage.f b/lapack-netlib/SRC/zsysv_aa_2stage.f index fcf9bc870..029ed587d 100644 --- a/lapack-netlib/SRC/zsysv_aa_2stage.f +++ b/lapack-netlib/SRC/zsysv_aa_2stage.f @@ -105,6 +105,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -124,7 +125,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -150,6 +151,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -233,19 +235,18 @@ INFO = -3 ELSE IF( LDA.LT.MAX( 1, N ) ) THEN INFO = -5 + ELSE IF( LTB.LT.( 4*N ) .AND. .NOT.TQUERY ) THEN + INFO = -7 ELSE IF( LDB.LT.MAX( 1, N ) ) THEN INFO = -11 + ELSE IF( LWORK.LT.N .AND. .NOT.WQUERY ) THEN + INFO = -13 END IF * IF( INFO.EQ.0 ) THEN CALL ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, -1, IPIV, $ IPIV2, WORK, -1, INFO ) LWKOPT = INT( WORK(1) ) - IF( LTB.LT.INT( TB(1) ) .AND. .NOT.TQUERY ) THEN - INFO = -7 - ELSE IF( LWORK.LT.LWKOPT .AND. .NOT.WQUERY ) THEN - INFO = -13 - END IF END IF * IF( INFO.NE.0 ) THEN diff --git a/lapack-netlib/SRC/zsytrf_aa_2stage.f b/lapack-netlib/SRC/zsytrf_aa_2stage.f index 1f916726e..d3486c1a7 100644 --- a/lapack-netlib/SRC/zsytrf_aa_2stage.f +++ b/lapack-netlib/SRC/zsytrf_aa_2stage.f @@ -93,6 +93,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N, internally *> used to select NB such that LTB >= (3*NB+1)*N. *> @@ -112,7 +113,7 @@ *> *> \param[out] IPIV2 *> \verbatim -*> IPIV is INTEGER array, dimension (N) +*> IPIV2 is INTEGER array, dimension (N) *> On exit, it contains the details of the interchanges, i.e., *> the row and column k of T were interchanged with the *> row and column IPIV(k). @@ -125,6 +126,7 @@ *> *> \param[in] LWORK *> \verbatim +*> LWORK is INTEGER *> The size of WORK. LWORK >= N, internally used to select NB *> such that LWORK >= N*NB. *> @@ -662,6 +664,8 @@ c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 ) * * Factor the band matrix CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO ) +* + RETURN * * End of ZSYTRF_AA_2STAGE * diff --git a/lapack-netlib/SRC/zsytri2.f b/lapack-netlib/SRC/zsytri2.f index d5aabd43a..e7303c90b 100644 --- a/lapack-netlib/SRC/zsytri2.f +++ b/lapack-netlib/SRC/zsytri2.f @@ -163,7 +163,7 @@ UPPER = LSAME( UPLO, 'U' ) LQUERY = ( LWORK.EQ.-1 ) * Get blocksize - NBMAX = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 ) + NBMAX = ILAENV( 1, 'ZSYTRI2', UPLO, N, -1, -1, -1 ) IF ( NBMAX .GE. N ) THEN MINSIZE = N ELSE diff --git a/lapack-netlib/SRC/zsytrs_aa_2stage.f b/lapack-netlib/SRC/zsytrs_aa_2stage.f index c5d894753..fa15eee90 100644 --- a/lapack-netlib/SRC/zsytrs_aa_2stage.f +++ b/lapack-netlib/SRC/zsytrs_aa_2stage.f @@ -85,6 +85,7 @@ *> *> \param[in] LTB *> \verbatim +*> LTB is INTEGER *> The size of the array TB. LTB >= 4*N. *> \endverbatim *> diff --git a/lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f b/lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f index 5698bcf94..f6d990d1c 100644 --- a/lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f +++ b/lapack-netlib/TESTING/LIN/dchksy_aa_2stage.f @@ -218,7 +218,7 @@ * .. * .. External Subroutines .. EXTERNAL ALAERH, ALAHD, ALASUM, DERRSY, DLACPY, DLARHS, - $ DLATB4, DLATMS, DPOT02, DSYTRF_AA_2STAGE + $ DLATB4, DLATMS, DPOT02, DSYTRF_AA_2STAGE, $ DSYTRS_AA_2STAGE, XLAENV * .. * .. Intrinsic Functions .. diff --git a/lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f b/lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f index 0be321eb0..898422654 100644 --- a/lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f +++ b/lapack-netlib/TESTING/LIN/ddrvsy_aa_2stage.f @@ -204,7 +204,7 @@ * .. External Subroutines .. EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, DERRVX, $ DGET04, DLACPY, DLARHS, DLATB4, DLATMS, - $ DSYSV_AA_2STAGE, CHET01_AA, DPOT02, + $ DSYSV_AA_2STAGE, DPOT02, $ DSYTRF_AA_2STAGE * .. * .. Scalars in Common .. diff --git a/lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f b/lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f index d8d9dc0a9..70e8ff6b8 100644 --- a/lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f +++ b/lapack-netlib/TESTING/LIN/sdrvsy_aa_2stage.f @@ -203,7 +203,7 @@ * .. * .. External Subroutines .. EXTERNAL ALADHD, ALAERH, ALASVM, XLAENV, SERRVX, - $ CGET04, SLACPY, SLARHS, SLATB4, SLATMS, + $ SLACPY, SLARHS, SLATB4, SLATMS, $ SSYSV_AA_2STAGE, SSYT01_AA, SPOT02, $ SSYTRF_AA_2STAGE * .. diff --git a/lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f b/lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f index d4d8c2939..87fc47f71 100644 --- a/lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f +++ b/lapack-netlib/TESTING/LIN/zchksy_aa_2stage.f @@ -217,8 +217,8 @@ DOUBLE PRECISION RESULT( NTESTS ) * .. * .. External Subroutines .. - EXTERNAL ALAERH, ALAHD, ALASUM, CERRSY, ZLACPY, ZLARHS, - $ CLATB4, ZLATMS, ZSYT02, ZSYT01, + EXTERNAL ALAERH, ALAHD, ALASUM, ZERRSY, ZLACPY, ZLARHS, + $ ZLATB4, ZLATMS, ZSYT02, ZSYT01, $ ZSYTRF_AA_2STAGE, ZSYTRS_AA_2STAGE, $ XLAENV * .. diff --git a/lapack-netlib/lapack_testing.py b/lapack-netlib/lapack_testing.py index 3c917482d..5d07e1e87 100755 --- a/lapack-netlib/lapack_testing.py +++ b/lapack-netlib/lapack_testing.py @@ -257,16 +257,16 @@ for dtype in range_prec: else: if dtest==16: # LIN TESTS - cmdbase="xlintst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" + cmdbase="LIN/xlintst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" elif dtest==17: # PROTO LIN TESTS - cmdbase="xlintst"+letter+dtypes[0][dtype-1]+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" + cmdbase="LIN/xlintst"+letter+dtypes[0][dtype-1]+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" elif dtest==18: # PROTO LIN TESTS - cmdbase="xlintstrf"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" + cmdbase="LIN/xlintstrf"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" else: # EIG TESTS - cmdbase="xeigtst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" + cmdbase="EIG/xeigtst"+letter+" < "+dtests[0][dtest]+".in > "+dtests[2][dtest]+".out" if (not just_errors and not short_summary): print("Testing "+name+" "+dtests[1][dtest]+"-"+cmdbase, end=' ') # Run the process: either to read the file or run the LAPACK testing