diff --git a/Makefile.arm64 b/Makefile.arm64 index 064e84cbb..1b10446f7 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -69,7 +69,7 @@ endif # in GCC>=9 ifeq ($(CORE), NEOVERSEN1) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) -ifeq ($(GCCVERSIONGTEQ9), 1) +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG))) CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1 ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1 @@ -92,9 +92,14 @@ endif # in GCC>=10.4 ifeq ($(CORE), NEOVERSEV1) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) -ifeq ($(GCCVERSIONGTEQ10), 1) -ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11))) -CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-v1 +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG))) +ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG))) +CCOMMON_OPT += -march=armv8.4-a+sve +ifeq (1, $(ISCLANG)) +CCOMMON_OPT += -mtune=cortex-x1 +else +CCOMMON_OPT += -mtune=neoverse-v1 +endif ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1 endif @@ -122,8 +127,8 @@ endif # in GCC>=10.4 ifeq ($(CORE), NEOVERSEN2) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) -ifeq ($(GCCVERSIONGTEQ10), 1) -ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11))) +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG))) +ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG))) ifneq ($(OSNAME), Darwin) CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2 else @@ -155,7 +160,7 @@ endif # Use a53 tunings because a55 is only available in GCC>=8.1 ifeq ($(CORE), CORTEXA55) ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) -ifeq ($(GCCVERSIONGTEQ8), 1) +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG))) CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 @@ -196,8 +201,13 @@ endif endif ifeq ($(CORE), THUNDERX3T110) -ifeq ($(GCCVERSIONGTEQ10), 1) -CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110 +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG))) +CCOMMON_OPT += -march=armv8.3-a +ifeq (0, $(ISCLANG)) +CCOMMON_OPT += -mtune=thunderx3t110 +else +CCOMMON_OPT += -mtune=thunderx2t99 +endif ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110 endif @@ -225,9 +235,12 @@ endif endif endif -ifeq ($(GCCVERSIONGTEQ9), 1) +ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG))) ifeq ($(CORE), EMAG8180) -CCOMMON_OPT += -march=armv8-a -mtune=emag +CCOMMON_OPT += -march=armv8-a +ifeq ($(ISCLANG), 0) +CCOMMON_OPT += -mtune=emag +endif ifneq ($(F_COMPILER), NAG) FCOMMON_OPT += -march=armv8-a -mtune=emag endif diff --git a/README.md b/README.md index a2eac07be..081d45870 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,12 @@ AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n Cirrus CI: [![Build Status](https://api.cirrus-ci.com/github/xianyi/OpenBLAS.svg?branch=develop)](https://cirrus-ci.com/github/xianyi/OpenBLAS) + [![Build Status](https://dev.azure.com/xianyi/OpenBLAS/_apis/build/status/xianyi.OpenBLAS?branchName=develop)](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop) +OSUOSL POWERCI [![Build Status](https://powerci.osuosl.org/buildStatus/icon?job=OpenBLAS_gh%2Fdevelop)](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/) +OSUOSL IBMZ-CI [![Build Status](http://ibmz-ci.osuosl.org/buildStatus/icon?job=OpenBLAS-Z%2Fdevelop)](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/) ## Introduction OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version. diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 65ef538e9..ff56ad00b 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -271,6 +271,19 @@ jobs: - script: | make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 +- job: OSX_xbuild_DYNAMIC_ARM64 + pool: + vmImage: 'macOS-11' + variables: + CC: /Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang + CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.3.sdk -arch arm64 + steps: + - script: | + ls /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs + /Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus + /Applications/Xcode_11.7.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version + make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 + - job: ALPINE_MUSL pool: vmImage: 'ubuntu-latest' diff --git a/cmake/arch.cmake b/cmake/arch.cmake index f70019800..e6e434a0a 100644 --- a/cmake/arch.cmake +++ b/cmake/arch.cmake @@ -46,7 +46,7 @@ if (DYNAMIC_ARCH) if (ARM64) set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110) if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99) - set(DYNAMIC_CORE "${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2") + set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2) endif () if (DYNAMIC_LIST) set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST}) diff --git a/cmake/lapack.cmake b/cmake/lapack.cmake index d339f0ce9..5c6290484 100644 --- a/cmake/lapack.cmake +++ b/cmake/lapack.cmake @@ -187,7 +187,7 @@ set(CLASRC cposv.f cposvx.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f - crot.f cspcon.f csprfs.f cspsv.f + crot.f crscl.f cspcon.f csprfs.f cspsv.f cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f cstegr.f cstein.f csteqr.f csycon.f csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f @@ -381,7 +381,7 @@ set(ZLASRC zposv.f zposvx.f zpotrf2.f zpotri.f zpotrs.f zpstrf.f zpstf2.f zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f - zrot.f zspcon.f zsprfs.f zspsv.f + zrot.f zrscl.f zspcon.f zsprfs.f zspsv.f zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f zstegr.f zstein.f zsteqr.f zsycon.f zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f @@ -686,7 +686,7 @@ set(CLASRC cposv.c cposvx.c cpotrf2.c cpotri.c cpstrf.c cpstf2.c cppcon.c cppequ.c cpprfs.c cppsv.c cppsvx.c cpptrf.c cpptri.c cpptrs.c cptcon.c cpteqr.c cptrfs.c cptsv.c cptsvx.c cpttrf.c cpttrs.c cptts2.c - crot.c cspcon.c csprfs.c cspsv.c + crot.c crscl.c cspcon.c csprfs.c cspsv.c cspsvx.c csptrf.c csptri.c csptrs.c csrscl.c cstedc.c cstegr.c cstein.c csteqr.c csycon.c csyrfs.c csysv.c csysvx.c csytf2.c csytrf.c csytri.c @@ -878,7 +878,7 @@ set(ZLASRC zposv.c zposvx.c zpotrf2.c zpotri.c zpotrs.c zpstrf.c zpstf2.c zppcon.c zppequ.c zpprfs.c zppsv.c zppsvx.c zpptrf.c zpptri.c zpptrs.c zptcon.c zpteqr.c zptrfs.c zptsv.c zptsvx.c zpttrf.c zpttrs.c zptts2.c - zrot.c zspcon.c zsprfs.c zspsv.c + zrot.c zrscl.c zspcon.c zsprfs.c zspsv.c zspsvx.c zsptrf.c zsptri.c zsptrs.c zdrscl.c zstedc.c zstegr.c zstein.c zsteqr.c zsycon.c zsyrfs.c zsysv.c zsysvx.c zsytf2.c zsytrf.c zsytri.c diff --git a/cmake/system.cmake b/cmake/system.cmake index 3dc6c863e..414193ec8 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -280,7 +280,29 @@ if (DEFINED TARGET) if (${TARGET} STREQUAL POWER8) set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math") endif() + +if (${TARGET} STREQUAL NEOVERSEV1) + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1") + else () + message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.") + endif() + endif() + if (${TARGET} STREQUAL NEOVERSEN2) + execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) + if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2") + else () + message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.") + endif() + endif() + if (${TARGET} STREQUAL ARMV8SVE) + set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve") + endif() + endif() + if (DEFINED BINARY) message(STATUS "Compiling a ${BINARY}-bit binary.") endif () diff --git a/common_loongarch64.h b/common_loongarch64.h index e15539b5f..ce1fcf091 100644 --- a/common_loongarch64.h +++ b/common_loongarch64.h @@ -83,6 +83,19 @@ static inline int blas_quickdivide(blasint x, blasint y){ return x / y; } +#ifndef NO_AFFINITY +static inline int WhereAmI(void){ + int ret = 0, counter = 0; + __asm__ volatile ( + "rdtimel.w %[counter], %[id]" + : [id]"=r"(ret), [counter]"=r"(counter) + : + : "memory" + ); + return ret; +} +#endif + #ifdef DOUBLE #define GET_IMAGE(res) __asm__ __volatile__("fmov.d %0, $f2" : "=f"(res) : : "memory") #else diff --git a/interface/imatcopy.c b/interface/imatcopy.c index c4417e99c..4cf0966cc 100644 --- a/interface/imatcopy.c +++ b/interface/imatcopy.c @@ -100,13 +100,13 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, if ( order == BlasColMajor) { - if ( trans == BlasNoTrans && *ldb < *rows ) info = 9; - if ( trans == BlasTrans && *ldb < *cols ) info = 9; + if ( trans == BlasNoTrans && *ldb < *rows ) info = 8; + if ( trans == BlasTrans && *ldb < *cols ) info = 8; } if ( order == BlasRowMajor) { - if ( trans == BlasNoTrans && *ldb < *cols ) info = 9; - if ( trans == BlasTrans && *ldb < *rows ) info = 9; + if ( trans == BlasNoTrans && *ldb < *cols ) info = 8; + if ( trans == BlasTrans && *ldb < *rows ) info = 8; } if ( order == BlasColMajor && *lda < *rows ) info = 7; diff --git a/kernel/arm64/dot_kernel_sve.c b/kernel/arm64/dot_kernel_sve.c index 8460e0d5e..9c057551e 100644 --- a/kernel/arm64/dot_kernel_sve.c +++ b/kernel/arm64/dot_kernel_sve.c @@ -50,8 +50,8 @@ static FLOAT dot_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y) { BLASLONG sve_width = SVE_WIDTH; for (BLASLONG i = 0; i < n; i += sve_width * 2) { - svbool_t pg_a = SVE_WHILELT(i, n); - svbool_t pg_b = SVE_WHILELT(i + sve_width, n); + svbool_t pg_a = SVE_WHILELT((uint64_t)i, (uint64_t)n); + svbool_t pg_b = SVE_WHILELT((uint64_t)(i + sve_width), (uint64_t)n); SVE_TYPE x_vec_a = svld1(pg_a, &x[i]); SVE_TYPE y_vec_a = svld1(pg_a, &y[i]); diff --git a/kernel/arm64/gemm_ncopy_sve_v1x8.c b/kernel/arm64/gemm_ncopy_sve_v1x8.c index 113b1ee40..7b2a2e767 100644 --- a/kernel/arm64/gemm_ncopy_sve_v1x8.c +++ b/kernel/arm64/gemm_ncopy_sve_v1x8.c @@ -107,7 +107,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b) { BLASLONG remaining_n = n - single_vectors_n; if (remaining_n) { a_offset_inner = a_offset; - svbool_t pg = SV_WHILE(0L, remaining_n); + svbool_t pg = SV_WHILE((uint64_t)0L, (uint64_t)remaining_n); uint64_t active = remaining_n; uint64_t i_cnt = m >> 2; while (i_cnt--) { diff --git a/kernel/arm64/gemm_tcopy_sve_v1x8.c b/kernel/arm64/gemm_tcopy_sve_v1x8.c index 68a2cc07c..9a93b6cb7 100644 --- a/kernel/arm64/gemm_tcopy_sve_v1x8.c +++ b/kernel/arm64/gemm_tcopy_sve_v1x8.c @@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){ BLASLONG remaining_n = n - single_vectors_n; if (remaining_n) { a_offset_inner = a_offset; - svbool_t pg = SV_WHILE(0L, remaining_n); + svbool_t pg = SV_WHILE((uint64_t)0L, (uint64_t)remaining_n); uint64_t active = remaining_n; uint64_t i_cnt = m >> 2; while (i_cnt--) { diff --git a/kernel/arm64/trmm_lncopy_sve_v1.c b/kernel/arm64/trmm_lncopy_sve_v1.c index 918e945ac..c7f79e3fd 100644 --- a/kernel/arm64/trmm_lncopy_sve_v1.c +++ b/kernel/arm64/trmm_lncopy_sve_v1.c @@ -52,11 +52,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON FLOAT *ao; #ifdef DOUBLE svint64_t index = svindex_s64(0LL, lda); - svbool_t pn = svwhilelt_b64(js, n); + svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); int n_active = svcntp_b64(svptrue_b64(), pn); #else svint32_t index = svindex_s32(0, lda); - svbool_t pn = svwhilelt_b32(js, n); + svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); int n_active = svcntp_b32(svptrue_b32(), pn); #endif do @@ -123,11 +123,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON posY += n_active; js += n_active; #ifdef DOUBLE - pn = svwhilelt_b64(js, n); + pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); n_active = svcntp_b64(svptrue_b64(), pn); } while (svptest_any(svptrue_b64(), pn)); #else - pn = svwhilelt_b32(js, n); + pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); n_active = svcntp_b32(svptrue_b32(), pn); } while (svptest_any(svptrue_b32(), pn)); #endif diff --git a/kernel/arm64/trmm_ltcopy_sve_v1.c b/kernel/arm64/trmm_ltcopy_sve_v1.c index b76cc56de..b3ba68973 100644 --- a/kernel/arm64/trmm_ltcopy_sve_v1.c +++ b/kernel/arm64/trmm_ltcopy_sve_v1.c @@ -51,10 +51,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON FLOAT *ao; js = 0; #ifdef DOUBLE - svbool_t pn = svwhilelt_b64(js, n); + svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); int n_active = svcntp_b64(svptrue_b64(), pn); #else - svbool_t pn = svwhilelt_b32(js, n); + svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); int n_active = svcntp_b32(svptrue_b32(), pn); #endif do @@ -122,11 +122,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON posY += n_active; js += n_active; #ifdef DOUBLE - pn = svwhilelt_b64(js, n); + pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); n_active = svcntp_b64(svptrue_b64(), pn); } while (svptest_any(svptrue_b64(), pn)); #else - pn = svwhilelt_b32(js, n); + pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); n_active = svcntp_b32(svptrue_b32(), pn); } while (svptest_any(svptrue_b32(), pn)); #endif diff --git a/kernel/arm64/trmm_uncopy_sve_v1.c b/kernel/arm64/trmm_uncopy_sve_v1.c index 75fa163ae..a47d2096c 100644 --- a/kernel/arm64/trmm_uncopy_sve_v1.c +++ b/kernel/arm64/trmm_uncopy_sve_v1.c @@ -52,11 +52,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON FLOAT *ao; #ifdef DOUBLE svint64_t index = svindex_s64(0LL, lda); - svbool_t pn = svwhilelt_b64(js, n); + svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); int n_active = svcntp_b64(svptrue_b64(), pn); #else svint32_t index = svindex_s32(0, lda); - svbool_t pn = svwhilelt_b32(js, n); + svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); int n_active = svcntp_b32(svptrue_b32(), pn); #endif do @@ -123,11 +123,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON posY += n_active; js += n_active; #ifdef DOUBLE - pn = svwhilelt_b64(js, n); + pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); n_active = svcntp_b64(svptrue_b64(), pn); } while (svptest_any(svptrue_b64(), pn)); #else - pn = svwhilelt_b32(js, n); + pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); n_active = svcntp_b32(svptrue_b32(), pn); } while (svptest_any(svptrue_b32(), pn)); #endif diff --git a/kernel/arm64/trmm_utcopy_sve_v1.c b/kernel/arm64/trmm_utcopy_sve_v1.c index 36a03242a..c5188beb4 100644 --- a/kernel/arm64/trmm_utcopy_sve_v1.c +++ b/kernel/arm64/trmm_utcopy_sve_v1.c @@ -51,10 +51,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON FLOAT *ao; js = 0; #ifdef DOUBLE - svbool_t pn = svwhilelt_b64(js, n); + svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); int n_active = svcntp_b64(svptrue_b64(), pn); #else - svbool_t pn = svwhilelt_b32(js, n); + svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); int n_active = svcntp_b32(svptrue_b32(), pn); #endif do @@ -121,11 +121,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON posY += n_active; js += n_active; #ifdef DOUBLE - pn = svwhilelt_b64(js, n); + pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); n_active = svcntp_b64(svptrue_b64(), pn); } while (svptest_any(svptrue_b64(), pn)); #else - pn = svwhilelt_b32(js, n); + pn = svwhilelt_b32((uint64_t)js, (uint64_t)n); n_active = svcntp_b32(svptrue_b32(), pn); } while (svptest_any(svptrue_b32(), pn)); #endif diff --git a/kernel/arm64/trsm_lncopy_sve.c b/kernel/arm64/trsm_lncopy_sve.c index 5a9d4194a..2895eb85d 100644 --- a/kernel/arm64/trsm_lncopy_sve.c +++ b/kernel/arm64/trsm_lncopy_sve.c @@ -56,13 +56,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT #ifdef DOUBLE int64_t js = 0; svint64_t index = svindex_s64(0LL, lda); - svbool_t pn = svwhilelt_b64(js, n); + svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); int n_active = svcntp_b64(svptrue_b64(), pn); #else int32_t N = n; int32_t js = 0; svint32_t index = svindex_s32(0, lda); - svbool_t pn = svwhilelt_b32(js, N); + svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N); int n_active = svcntp_b32(svptrue_b32(), pn); #endif do { @@ -106,11 +106,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT js += n_active; #ifdef DOUBLE - pn = svwhilelt_b64(js, n); + pn = svwhilelt_b64((uint64_t)js, (uint64_t)n); n_active = svcntp_b64(svptrue_b64(), pn); } while (svptest_any(svptrue_b64(), pn)); #else - pn = svwhilelt_b32(js, N); + pn = svwhilelt_b32((uint32_t)js, (uint32_t)N); n_active = svcntp_b32(svptrue_b32(), pn); } while (svptest_any(svptrue_b32(), pn)); #endif diff --git a/kernel/generic/ztrsm_utcopy_1.c b/kernel/generic/ztrsm_utcopy_1.c index 08f85e891..5833a64ef 100644 --- a/kernel/generic/ztrsm_utcopy_1.c +++ b/kernel/generic/ztrsm_utcopy_1.c @@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT BLASLONG i, ii, j, jj; - FLOAT data01, data02; + FLOAT data01=0.0, data02=0.0; FLOAT *a1; lda *= 2; diff --git a/kernel/generic/ztrsm_utcopy_2.c b/kernel/generic/ztrsm_utcopy_2.c index 387bb2532..bc495f7c6 100644 --- a/kernel/generic/ztrsm_utcopy_2.c +++ b/kernel/generic/ztrsm_utcopy_2.c @@ -47,6 +47,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT FLOAT data05, data06, data07, data08; FLOAT *a1, *a2; + data01=data02=data07=data08=0.0; lda *= 2; jj = offset; diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarfb.c b/lapack-netlib/LAPACKE/src/lapacke_clarfb.c index ed12b476e..aac7b551d 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clarfb.c @@ -58,7 +58,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_clarfb", -8 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_clarfb_work.c b/lapack-netlib/LAPACKE/src/lapacke_clarfb_work.c index 545769b83..67bbbd34f 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_clarfb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_clarfb_work.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans, nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; ldc_t = MAX(1,m); ldt_t = MAX(1,k); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c b/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c index f4ddc62a5..aeebd8dec 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlarfb.c @@ -57,7 +57,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_dlarfb", -8 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_dlarfb_work.c b/lapack-netlib/LAPACKE/src/lapacke_dlarfb_work.c index de444c146..de2f41e66 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_dlarfb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_dlarfb_work.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans, nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; ldc_t = MAX(1,m); ldt_t = MAX(1,k); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slarfb.c b/lapack-netlib/LAPACKE/src/lapacke_slarfb.c index d36958f93..3d6c29f88 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slarfb.c @@ -57,7 +57,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_slarfb", -8 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_slarfb_work.c b/lapack-netlib/LAPACKE/src/lapacke_slarfb_work.c index 8b6127633..72a392a77 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_slarfb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_slarfb_work.c @@ -59,7 +59,7 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans, nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; ldc_t = MAX(1,m); ldt_t = MAX(1,k); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c b/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c index 85355b202..c5edbbc0e 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarfb.c @@ -58,7 +58,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) { LAPACKE_xerbla( "LAPACKE_zlarfb", -8 ); diff --git a/lapack-netlib/LAPACKE/src/lapacke_zlarfb_work.c b/lapack-netlib/LAPACKE/src/lapacke_zlarfb_work.c index 72d85ec82..232c8ef58 100644 --- a/lapack-netlib/LAPACKE/src/lapacke_zlarfb_work.c +++ b/lapack-netlib/LAPACKE/src/lapacke_zlarfb_work.c @@ -60,7 +60,7 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans, nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) ); ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) ); - uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u'; + uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u'; ldc_t = MAX(1,m); ldt_t = MAX(1,k); diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 74db14e46..c75fd5f49 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -280,7 +280,7 @@ CLASRC_O = \ cposv.o cposvx.o cpotf2.o cpotri.o cpstrf.o cpstf2.o \ cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \ cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \ - crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \ + crot.o crscl.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \ cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \ cstegr.o cstein.o csteqr.o \ csycon.o csymv.o \ @@ -488,7 +488,7 @@ ZLASRC_O = \ zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \ zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \ zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \ - zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \ + zrot.o zrscl.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \ zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \ zstegr.o zstein.o zsteqr.o \ zsycon.o zsymv.o \ diff --git a/lapack-netlib/SRC/cgelss.f b/lapack-netlib/SRC/cgelss.f index da6b9092f..d1e38c504 100644 --- a/lapack-netlib/SRC/cgelss.f +++ b/lapack-netlib/SRC/cgelss.f @@ -170,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEsolve +*> \ingroup gelss * * ===================================================================== SUBROUTINE CGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, @@ -214,8 +214,7 @@ * .. External Subroutines .. EXTERNAL CBDSQR, CCOPY, CGEBRD, CGELQF, CGEMM, CGEMV, $ CGEQRF, CLACPY, CLASCL, CLASET, CSRSCL, CUNGBR, - $ CUNMBR, CUNMLQ, CUNMQR, SLABAD, SLASCL, SLASET, - $ XERBLA + $ CUNMBR, CUNMLQ, CUNMQR, SLASCL, SLASET, XERBLA * .. * .. External Functions .. INTEGER ILAENV @@ -388,7 +387,6 @@ SFMIN = SLAMCH( 'S' ) SMLNUM = SFMIN / EPS BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A if max element outside range [SMLNUM,BIGNUM] * @@ -540,7 +538,7 @@ $ LDB, CZERO, WORK, N ) CALL CLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) 20 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL CGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) CALL CCOPY( N, WORK, 1, B, 1 ) END IF @@ -645,7 +643,7 @@ CALL CLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), $ LDB ) 40 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL CGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ), $ 1, CZERO, WORK( IWORK ), 1 ) CALL CCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) @@ -737,7 +735,7 @@ $ LDB, CZERO, WORK, N ) CALL CLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) 60 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL CGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) CALL CCOPY( N, WORK, 1, B, 1 ) END IF diff --git a/lapack-netlib/SRC/cgetf2.f b/lapack-netlib/SRC/cgetf2.f index aac989970..995ee40ec 100644 --- a/lapack-netlib/SRC/cgetf2.f +++ b/lapack-netlib/SRC/cgetf2.f @@ -101,7 +101,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexGEcomputational +*> \ingroup getf2 * * ===================================================================== SUBROUTINE CGETF2( M, N, A, LDA, IPIV, INFO ) @@ -126,16 +126,14 @@ $ ZERO = ( 0.0E+0, 0.0E+0 ) ) * .. * .. Local Scalars .. - REAL SFMIN - INTEGER I, J, JP + INTEGER J, JP * .. * .. External Functions .. - REAL SLAMCH INTEGER ICAMAX - EXTERNAL SLAMCH, ICAMAX + EXTERNAL ICAMAX * .. * .. External Subroutines .. - EXTERNAL CGERU, CSCAL, CSWAP, XERBLA + EXTERNAL CGERU, CRSCL, CSWAP, XERBLA * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -161,10 +159,6 @@ * IF( M.EQ.0 .OR. N.EQ.0 ) $ RETURN -* -* Compute machine safe minimum -* - SFMIN = SLAMCH('S') * DO 10 J = 1, MIN( M, N ) * @@ -181,15 +175,8 @@ * * Compute elements J+1:M of J-th column. * - IF( J.LT.M ) THEN - IF( ABS(A( J, J )) .GE. SFMIN ) THEN - CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) - ELSE - DO 20 I = 1, M-J - A( J+I, J ) = A( J+I, J ) / A( J, J ) - 20 CONTINUE - END IF - END IF + IF( J.LT.M ) + $ CALL CRSCL( M-J, A( J, J ), A( J+1, J ), 1 ) * ELSE IF( INFO.EQ.0 ) THEN * diff --git a/lapack-netlib/SRC/crscl.c b/lapack-netlib/SRC/crscl.c new file mode 100644 index 000000000..7c87553d5 --- /dev/null +++ b/lapack-netlib/SRC/crscl.c @@ -0,0 +1,735 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b CRSCL multiplies a vector by the reciprocal of a real scalar. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download CRSCL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE CRSCL( N, A, X, INCX ) */ + +/* INTEGER INCX, N */ +/* COMPLEX A */ +/* COMPLEX X( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > CRSCL multiplies an n-element complex vector x by the complex scalar */ +/* > 1/a. This is done without overflow or underflow as long as */ +/* > the final result x/a does not overflow or underflow. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of components of the vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX */ +/* > The scalar a which is used to divide each component of x. */ +/* > A must not be 0, or the subroutine will divide by zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX array, dimension */ +/* > (1+(N-1)*abs(INCX)) */ +/* > The n-element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between successive values of the vector X. */ +/* > > 0: X(1) = X(1) and X(1+(i-1)*INCX) = x(i), 1< i<= n */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complexOTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int crscl_(integer *n, complex *a, complex *x, integer *incx) +{ + /* System generated locals */ + real r__1, r__2; + complex q__1; + + /* Local variables */ + real absi, absr; + extern /* Subroutine */ int cscal_(integer *, complex *, complex *, + integer *); + real ai, ar, ui, ov, ur; + extern real slamch_(char *); + extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer + *); + real safmin, safmax; + extern /* Subroutine */ int csrscl_(integer *, real *, complex *, integer + *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + +/* Get machine parameters */ + + safmin = slamch_("S"); + safmax = 1.f / safmin; + ov = slamch_("O"); + +/* Initialize constants related to A. */ + + ar = a->r; + ai = r_imag(a); + absr = abs(ar); + absi = abs(ai); + + if (ai == 0.f) { +/* If alpha is real, then we can use csrscl */ + csrscl_(n, &ar, &x[1], incx); + + } else if (ar == 0.f) { +/* If alpha has a zero real part, then we follow the same rules as if */ +/* alpha were real. */ + if (absi > safmax) { + csscal_(n, &safmin, &x[1], incx); + r__1 = -safmax / ai; + q__1.r = 0.f, q__1.i = r__1; + cscal_(n, &q__1, &x[1], incx); + } else if (absi < safmin) { + r__1 = -safmin / ai; + q__1.r = 0.f, q__1.i = r__1; + cscal_(n, &q__1, &x[1], incx); + csscal_(n, &safmax, &x[1], incx); + } else { + r__1 = -1.f / ai; + q__1.r = 0.f, q__1.i = r__1; + cscal_(n, &q__1, &x[1], incx); + } + + } else { +/* The following numbers can be computed. */ +/* They are the inverse of the real and imaginary parts of 1/alpha. */ +/* Note that a and b are always different from zero. */ +/* NaNs are only possible if either: */ +/* 1. alphaR or alphaI is NaN. */ +/* 2. alphaR and alphaI are both infinite, in which case it makes sense */ +/* to propagate a NaN. */ + ur = ar + ai * (ai / ar); + ui = ai + ar * (ar / ai); + + if (abs(ur) < safmin || abs(ui) < safmin) { +/* This means that both alphaR and alphaI are very small. */ + r__1 = safmin / ur; + r__2 = -safmin / ui; + q__1.r = r__1, q__1.i = r__2; + cscal_(n, &q__1, &x[1], incx); + csscal_(n, &safmax, &x[1], incx); + } else if (abs(ur) > safmax || abs(ui) > safmax) { + if (absr > ov || absi > ov) { +/* This means that a and b are both Inf. No need for scaling. */ + r__1 = 1.f / ur; + r__2 = -1.f / ui; + q__1.r = r__1, q__1.i = r__2; + cscal_(n, &q__1, &x[1], incx); + } else { + csscal_(n, &safmin, &x[1], incx); + if (abs(ur) > ov || abs(ui) > ov) { +/* Infs were generated. We do proper scaling to avoid them. */ + if (absr >= absi) { +/* ABS( UR ) <= ABS( UI ) */ + ur = safmin * ar + safmin * (ai * (ai / ar)); + ui = safmin * ai + ar * (safmin * ar / ai); + } else { +/* ABS( UR ) > ABS( UI ) */ + ur = safmin * ar + ai * (safmin * ai / ar); + ui = safmin * ai + safmin * (ar * (ar / ai)); + } + r__1 = 1.f / ur; + r__2 = -1.f / ui; + q__1.r = r__1, q__1.i = r__2; + cscal_(n, &q__1, &x[1], incx); + } else { + r__1 = safmax / ur; + r__2 = -safmax / ui; + q__1.r = r__1, q__1.i = r__2; + cscal_(n, &q__1, &x[1], incx); + } + } + } else { + r__1 = 1.f / ur; + r__2 = -1.f / ui; + q__1.r = r__1, q__1.i = r__2; + cscal_(n, &q__1, &x[1], incx); + } + } + + return 0; + +/* End of CRSCL */ + +} /* crscl_ */ + diff --git a/lapack-netlib/SRC/crscl.f b/lapack-netlib/SRC/crscl.f new file mode 100644 index 000000000..22919cd62 --- /dev/null +++ b/lapack-netlib/SRC/crscl.f @@ -0,0 +1,202 @@ +*> \brief \b CRSCL multiplies a vector by the reciprocal of a real scalar. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download CRSCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE CRSCL( N, A, X, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX A +* .. +* .. Array Arguments .. +* COMPLEX X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> CRSCL multiplies an n-element complex vector x by the complex scalar +*> 1/a. This is done without overflow or underflow as long as +*> the final result x/a does not overflow or underflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of components of the vector x. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX +*> The scalar a which is used to divide each component of x. +*> A must not be 0, or the subroutine will divide by zero. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX array, dimension +*> (1+(N-1)*abs(INCX)) +*> The n-element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector X. +*> > 0: X(1) = X(1) and X(1+(i-1)*INCX) = x(i), 1< i<= n +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complexOTHERauxiliary +* +* ===================================================================== + SUBROUTINE CRSCL( N, A, X, INCX ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX A +* .. +* .. Array Arguments .. + COMPLEX X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + REAL ZERO, ONE + PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) +* .. +* .. Local Scalars .. + REAL SAFMAX, SAFMIN, OV, AR, AI, ABSR, ABSI, UR + % , UI +* .. +* .. External Functions .. + REAL SLAMCH + COMPLEX CLADIV + EXTERNAL SLAMCH, CLADIV +* .. +* .. External Subroutines .. + EXTERNAL CSCAL, CSSCAL, CSRSCL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SAFMIN = SLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + OV = SLAMCH( 'O' ) +* +* Initialize constants related to A. +* + AR = REAL( A ) + AI = AIMAG( A ) + ABSR = ABS( AR ) + ABSI = ABS( AI ) +* + IF( AI.EQ.ZERO ) THEN +* If alpha is real, then we can use csrscl + CALL CSRSCL( N, AR, X, INCX ) +* + ELSE IF( AR.EQ.ZERO ) THEN +* If alpha has a zero real part, then we follow the same rules as if +* alpha were real. + IF( ABSI.GT.SAFMAX ) THEN + CALL CSSCAL( N, SAFMIN, X, INCX ) + CALL CSCAL( N, CMPLX( ZERO, -SAFMAX / AI ), X, INCX ) + ELSE IF( ABSI.LT.SAFMIN ) THEN + CALL CSCAL( N, CMPLX( ZERO, -SAFMIN / AI ), X, INCX ) + CALL CSSCAL( N, SAFMAX, X, INCX ) + ELSE + CALL CSCAL( N, CMPLX( ZERO, -ONE / AI ), X, INCX ) + END IF +* + ELSE +* The following numbers can be computed. +* They are the inverse of the real and imaginary parts of 1/alpha. +* Note that a and b are always different from zero. +* NaNs are only possible if either: +* 1. alphaR or alphaI is NaN. +* 2. alphaR and alphaI are both infinite, in which case it makes sense +* to propagate a NaN. + UR = AR + AI * ( AI / AR ) + UI = AI + AR * ( AR / AI ) +* + IF( (ABS( UR ).LT.SAFMIN).OR.(ABS( UI ).LT.SAFMIN) ) THEN +* This means that both alphaR and alphaI are very small. + CALL CSCAL( N, CMPLX( SAFMIN / UR, -SAFMIN / UI ), X, INCX ) + CALL CSSCAL( N, SAFMAX, X, INCX ) + ELSE IF( (ABS( UR ).GT.SAFMAX).OR.(ABS( UI ).GT.SAFMAX) ) THEN + IF( (ABSR.GT.OV).OR.(ABSI.GT.OV) ) THEN +* This means that a and b are both Inf. No need for scaling. + CALL CSCAL( N, CMPLX( ONE / UR, -ONE / UI ), X, INCX ) + ELSE + CALL CSSCAL( N, SAFMIN, X, INCX ) + IF( (ABS( UR ).GT.OV).OR.(ABS( UI ).GT.OV) ) THEN +* Infs were generated. We do proper scaling to avoid them. + IF( ABSR.GE.ABSI ) THEN +* ABS( UR ) <= ABS( UI ) + UR = (SAFMIN * AR) + SAFMIN * (AI * ( AI / AR )) + UI = (SAFMIN * AI) + AR * ( (SAFMIN * AR) / AI ) + ELSE +* ABS( UR ) > ABS( UI ) + UR = (SAFMIN * AR) + AI * ( (SAFMIN * AI) / AR ) + UI = (SAFMIN * AI) + SAFMIN * (AR * ( AR / AI )) + END IF + CALL CSCAL( N, CMPLX( ONE / UR, -ONE / UI ), X, INCX ) + ELSE + CALL CSCAL( N, CMPLX( SAFMAX / UR, -SAFMAX / UI ), + $ X, INCX ) + END IF + END IF + ELSE + CALL CSCAL( N, CMPLX( ONE / UR, -ONE / UI ), X, INCX ) + END IF + END IF +* + RETURN +* +* End of CRSCL +* + END diff --git a/lapack-netlib/SRC/cstemr.f b/lapack-netlib/SRC/cstemr.f index d49684db3..9d47450e3 100644 --- a/lapack-netlib/SRC/cstemr.f +++ b/lapack-netlib/SRC/cstemr.f @@ -320,7 +320,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complexOTHERcomputational +*> \ingroup stemr * *> \par Contributors: * ================== @@ -329,7 +329,8 @@ *> Jim Demmel, University of California, Berkeley, USA \n *> Inderjit Dhillon, University of Texas, Austin, USA \n *> Osni Marques, LBNL/NERSC, USA \n -*> Christof Voemel, University of California, Berkeley, USA +*> Christof Voemel, University of California, Berkeley, USA \n +*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n * * ===================================================================== SUBROUTINE CSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, @@ -361,7 +362,8 @@ $ MINRGP = 3.0E-3 ) * .. * .. Local Scalars .. - LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY, + $ LAESWAP INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, @@ -397,6 +399,7 @@ * LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) ZQUERY = ( NZC.EQ.-1 ) + LAESWAP = .FALSE. * SSTEMR needs WORK of size 6*N, IWORK of size 3*N. * In addition, SLARRE needs WORK of size 6*N, IWORK of size 5*N. @@ -519,6 +522,15 @@ ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN CALL SLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) END IF +* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However, +* the following code requires R1 >= R2. Hence, we correct +* the order of R1, R2, CS, SN if R1 < R2 before further processing. + IF( R1.LT.R2 ) THEN + E(2) = R1 + R1 = R2 + R2 = E(2) + LAESWAP = .TRUE. + ENDIF IF( ALLEIG.OR. $ (VALEIG.AND.(R2.GT.WL).AND. $ (R2.LE.WU)).OR. @@ -526,8 +538,13 @@ M = M+1 W( M ) = R2 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = -SN - Z( 2, M ) = CS + IF( LAESWAP ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN + ELSE + Z( 1, M ) = -SN + Z( 2, M ) = CS + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN @@ -550,8 +567,13 @@ M = M+1 W( M ) = R1 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = CS - Z( 2, M ) = SN + IF( LAESWAP ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS + ELSE + Z( 1, M ) = CS + Z( 2, M ) = SN + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN diff --git a/lapack-netlib/SRC/dgelss.f b/lapack-netlib/SRC/dgelss.f index c4190f2e0..38449be7f 100644 --- a/lapack-netlib/SRC/dgelss.f +++ b/lapack-netlib/SRC/dgelss.f @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleGEsolve +*> \ingroup gelss * * ===================================================================== SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, @@ -203,7 +203,7 @@ * .. * .. External Subroutines .. EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV, - $ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR, + $ DGEQRF, DLACPY, DLASCL, DLASET, DORGBR, $ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA * .. * .. External Functions .. @@ -385,7 +385,6 @@ SFMIN = DLAMCH( 'S' ) SMLNUM = SFMIN / EPS BIGNUM = ONE / SMLNUM - CALL DLABAD( SMLNUM, BIGNUM ) * * Scale A if max element outside range [SMLNUM,BIGNUM] * @@ -529,7 +528,7 @@ $ LDB, ZERO, WORK, N ) CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) 20 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) CALL DCOPY( N, WORK, 1, B, 1 ) END IF @@ -626,7 +625,7 @@ CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), $ LDB ) 40 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), $ 1, ZERO, WORK( IWORK ), 1 ) CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) @@ -712,7 +711,7 @@ $ LDB, ZERO, WORK, N ) CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) 60 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) CALL DCOPY( N, WORK, 1, B, 1 ) END IF diff --git a/lapack-netlib/SRC/dstemr.f b/lapack-netlib/SRC/dstemr.f index d0c71ddd9..44a33423e 100644 --- a/lapack-netlib/SRC/dstemr.f +++ b/lapack-netlib/SRC/dstemr.f @@ -303,7 +303,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup doubleOTHERcomputational +*> \ingroup stemr * *> \par Contributors: * ================== @@ -312,7 +312,8 @@ *> Jim Demmel, University of California, Berkeley, USA \n *> Inderjit Dhillon, University of Texas, Austin, USA \n *> Osni Marques, LBNL/NERSC, USA \n -*> Christof Voemel, University of California, Berkeley, USA +*> Christof Voemel, University of California, Berkeley, USA \n +*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n * * ===================================================================== SUBROUTINE DSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, @@ -344,7 +345,8 @@ $ MINRGP = 1.0D-3 ) * .. * .. Local Scalars .. - LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY, + $ LAESWAP INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, @@ -380,6 +382,7 @@ * LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) ZQUERY = ( NZC.EQ.-1 ) + LAESWAP = .FALSE. * DSTEMR needs WORK of size 6*N, IWORK of size 3*N. * In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N. @@ -502,6 +505,15 @@ ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN CALL DLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) END IF +* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However, +* the following code requires R1 >= R2. Hence, we correct +* the order of R1, R2, CS, SN if R1 < R2 before further processing. + IF( R1.LT.R2 ) THEN + E(2) = R1 + R1 = R2 + R2 = E(2) + LAESWAP = .TRUE. + ENDIF IF( ALLEIG.OR. $ (VALEIG.AND.(R2.GT.WL).AND. $ (R2.LE.WU)).OR. @@ -509,8 +521,13 @@ M = M+1 W( M ) = R2 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = -SN - Z( 2, M ) = CS + IF( LAESWAP ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN + ELSE + Z( 1, M ) = -SN + Z( 2, M ) = CS + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN @@ -533,8 +550,13 @@ M = M+1 W( M ) = R1 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = CS - Z( 2, M ) = SN + IF( LAESWAP ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS + ELSE + Z( 1, M ) = CS + Z( 2, M ) = SN + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN diff --git a/lapack-netlib/SRC/sgelss.f b/lapack-netlib/SRC/sgelss.f index 9aed4329f..89d3a6e4f 100644 --- a/lapack-netlib/SRC/sgelss.f +++ b/lapack-netlib/SRC/sgelss.f @@ -164,7 +164,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realGEsolve +*> \ingroup gelss * * ===================================================================== SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, @@ -202,7 +202,7 @@ * .. * .. External Subroutines .. EXTERNAL SBDSQR, SCOPY, SGEBRD, SGELQF, SGEMM, SGEMV, - $ SGEQRF, SLABAD, SLACPY, SLASCL, SLASET, SORGBR, + $ SGEQRF, SLACPY, SLASCL, SLASET, SORGBR, $ SORMBR, SORMLQ, SORMQR, SRSCL, XERBLA * .. * .. External Functions .. @@ -381,7 +381,6 @@ SFMIN = SLAMCH( 'S' ) SMLNUM = SFMIN / EPS BIGNUM = ONE / SMLNUM - CALL SLABAD( SMLNUM, BIGNUM ) * * Scale A if max element outside range [SMLNUM,BIGNUM] * @@ -525,7 +524,7 @@ $ LDB, ZERO, WORK, N ) CALL SLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) 20 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL SGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) CALL SCOPY( N, WORK, 1, B, 1 ) END IF @@ -622,7 +621,7 @@ CALL SLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), $ LDB ) 40 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL SGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ), $ 1, ZERO, WORK( IWORK ), 1 ) CALL SCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) @@ -708,7 +707,7 @@ $ LDB, ZERO, WORK, N ) CALL SLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) 60 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL SGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 ) CALL SCOPY( N, WORK, 1, B, 1 ) END IF diff --git a/lapack-netlib/SRC/sstemr.f b/lapack-netlib/SRC/sstemr.f index 3a9bbe784..2ed697b69 100644 --- a/lapack-netlib/SRC/sstemr.f +++ b/lapack-netlib/SRC/sstemr.f @@ -303,7 +303,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup realOTHERcomputational +*> \ingroup stemr * *> \par Contributors: * ================== @@ -312,7 +312,8 @@ *> Jim Demmel, University of California, Berkeley, USA \n *> Inderjit Dhillon, University of Texas, Austin, USA \n *> Osni Marques, LBNL/NERSC, USA \n -*> Christof Voemel, University of California, Berkeley, USA +*> Christof Voemel, University of California, Berkeley, USA \n +*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n * * ===================================================================== SUBROUTINE SSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, @@ -344,7 +345,8 @@ $ MINRGP = 3.0E-3 ) * .. * .. Local Scalars .. - LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY, + $ LAESWAP INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, @@ -378,6 +380,7 @@ * LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) ZQUERY = ( NZC.EQ.-1 ) + LAESWAP = .FALSE. * SSTEMR needs WORK of size 6*N, IWORK of size 3*N. * In addition, SLARRE needs WORK of size 6*N, IWORK of size 5*N. @@ -500,6 +503,15 @@ ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN CALL SLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) END IF +* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However, +* the following code requires R1 >= R2. Hence, we correct +* the order of R1, R2, CS, SN if R1 < R2 before further processing. + IF( R1.LT.R2 ) THEN + E(2) = R1 + R1 = R2 + R2 = E(2) + LAESWAP = .TRUE. + ENDIF IF( ALLEIG.OR. $ (VALEIG.AND.(R2.GT.WL).AND. $ (R2.LE.WU)).OR. @@ -507,8 +519,13 @@ M = M+1 W( M ) = R2 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = -SN - Z( 2, M ) = CS + IF( LAESWAP ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN + ELSE + Z( 1, M ) = -SN + Z( 2, M ) = CS + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN @@ -531,8 +548,13 @@ M = M+1 W( M ) = R1 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = CS - Z( 2, M ) = SN + IF( LAESWAP ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS + ELSE + Z( 1, M ) = CS + Z( 2, M ) = SN + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN diff --git a/lapack-netlib/SRC/zgelss.f b/lapack-netlib/SRC/zgelss.f index be53ba95b..afdbaecf0 100644 --- a/lapack-netlib/SRC/zgelss.f +++ b/lapack-netlib/SRC/zgelss.f @@ -170,7 +170,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEsolve +*> \ingroup gelss * * ===================================================================== SUBROUTINE ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK, @@ -212,10 +212,9 @@ COMPLEX*16 DUM( 1 ) * .. * .. External Subroutines .. - EXTERNAL DLABAD, DLASCL, DLASET, XERBLA, ZBDSQR, ZCOPY, - $ ZDRSCL, ZGEBRD, ZGELQF, ZGEMM, ZGEMV, ZGEQRF, - $ ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNMBR, ZUNMLQ, - $ ZUNMQR + EXTERNAL DLASCL, DLASET, XERBLA, ZBDSQR, ZCOPY, ZDRSCL, + $ ZGEBRD, ZGELQF, ZGEMM, ZGEMV, ZGEQRF, ZLACPY, + $ ZLASCL, ZLASET, ZUNGBR, ZUNMBR, ZUNMLQ * .. * .. External Functions .. INTEGER ILAENV @@ -388,7 +387,6 @@ SFMIN = DLAMCH( 'S' ) SMLNUM = SFMIN / EPS BIGNUM = ONE / SMLNUM - CALL DLABAD( SMLNUM, BIGNUM ) * * Scale A if max element outside range [SMLNUM,BIGNUM] * @@ -540,7 +538,7 @@ $ LDB, CZERO, WORK, N ) CALL ZLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB ) 20 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL ZGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) CALL ZCOPY( N, WORK, 1, B, 1 ) END IF @@ -645,7 +643,7 @@ CALL ZLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ), $ LDB ) 40 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL ZGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ), $ 1, CZERO, WORK( IWORK ), 1 ) CALL ZCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 ) @@ -737,7 +735,7 @@ $ LDB, CZERO, WORK, N ) CALL ZLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB ) 60 CONTINUE - ELSE + ELSE IF( NRHS.EQ.1 ) THEN CALL ZGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 ) CALL ZCOPY( N, WORK, 1, B, 1 ) END IF diff --git a/lapack-netlib/SRC/zgetf2.f b/lapack-netlib/SRC/zgetf2.f index c247f8645..7c63dbbee 100644 --- a/lapack-netlib/SRC/zgetf2.f +++ b/lapack-netlib/SRC/zgetf2.f @@ -101,7 +101,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16GEcomputational +*> \ingroup getf2 * * ===================================================================== SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO ) @@ -127,7 +127,7 @@ * .. * .. Local Scalars .. DOUBLE PRECISION SFMIN - INTEGER I, J, JP + INTEGER J, JP * .. * .. External Functions .. DOUBLE PRECISION DLAMCH @@ -135,7 +135,7 @@ EXTERNAL DLAMCH, IZAMAX * .. * .. External Subroutines .. - EXTERNAL XERBLA, ZGERU, ZSCAL, ZSWAP + EXTERNAL XERBLA, ZGERU, ZRSCL, ZSWAP * .. * .. Intrinsic Functions .. INTRINSIC MAX, MIN @@ -181,15 +181,8 @@ * * Compute elements J+1:M of J-th column. * - IF( J.LT.M ) THEN - IF( ABS(A( J, J )) .GE. SFMIN ) THEN - CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 ) - ELSE - DO 20 I = 1, M-J - A( J+I, J ) = A( J+I, J ) / A( J, J ) - 20 CONTINUE - END IF - END IF + IF( J.LT.M ) + $ CALL ZRSCL( M-J, A( J, J ), A( J+1, J ), 1 ) * ELSE IF( INFO.EQ.0 ) THEN * diff --git a/lapack-netlib/SRC/zrscl.c b/lapack-netlib/SRC/zrscl.c new file mode 100644 index 000000000..2264b5465 --- /dev/null +++ b/lapack-netlib/SRC/zrscl.c @@ -0,0 +1,735 @@ +#include +#include +#include +#include +#include +#ifdef complex +#undef complex +#endif +#ifdef I +#undef I +#endif + +#if defined(_WIN64) +typedef long long BLASLONG; +typedef unsigned long long BLASULONG; +#else +typedef long BLASLONG; +typedef unsigned long BLASULONG; +#endif + +#ifdef LAPACK_ILP64 +typedef BLASLONG blasint; +#if defined(_WIN64) +#define blasabs(x) llabs(x) +#else +#define blasabs(x) labs(x) +#endif +#else +typedef int blasint; +#define blasabs(x) abs(x) +#endif + +typedef blasint integer; + +typedef unsigned int uinteger; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +#ifdef _MSC_VER +static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;} +static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;} +static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;} +static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;} +#else +static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;} +static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;} +static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;} +#endif +#define pCf(z) (*_pCf(z)) +#define pCd(z) (*_pCd(z)) +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +typedef int flag; +typedef int ftnlen; +typedef int ftnint; + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + integer1 g; + shortint h; + integer i; + /* longint j; */ + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#define dabs(x) (fabs(x)) +#define f2cmin(a,b) ((a) <= (b) ? (a) : (b)) +#define f2cmax(a,b) ((a) >= (b) ? (a) : (b)) +#define dmin(a,b) (f2cmin(a,b)) +#define dmax(a,b) (f2cmax(a,b)) +#define bit_test(a,b) ((a) >> (b) & 1) +#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b))) +#define bit_set(a,b) ((a) | ((uinteger)1 << (b))) + +#define abort_() { sig_die("Fortran abort routine called", 1); } +#define c_abs(z) (cabsf(Cf(z))) +#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); } +#ifdef _MSC_VER +#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);} +#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);} +#else +#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);} +#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);} +#endif +#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));} +#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));} +#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));} +//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));} +#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));} +#define d_abs(x) (fabs(*(x))) +#define d_acos(x) (acos(*(x))) +#define d_asin(x) (asin(*(x))) +#define d_atan(x) (atan(*(x))) +#define d_atn2(x, y) (atan2(*(x),*(y))) +#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); } +#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); } +#define d_cos(x) (cos(*(x))) +#define d_cosh(x) (cosh(*(x))) +#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 ) +#define d_exp(x) (exp(*(x))) +#define d_imag(z) (cimag(Cd(z))) +#define r_imag(z) (cimagf(Cf(z))) +#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x))) +#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) ) +#define d_log(x) (log(*(x))) +#define d_mod(x, y) (fmod(*(x), *(y))) +#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x))) +#define d_nint(x) u_nint(*(x)) +#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a))) +#define d_sign(a,b) u_sign(*(a),*(b)) +#define r_sign(a,b) u_sign(*(a),*(b)) +#define d_sin(x) (sin(*(x))) +#define d_sinh(x) (sinh(*(x))) +#define d_sqrt(x) (sqrt(*(x))) +#define d_tan(x) (tan(*(x))) +#define d_tanh(x) (tanh(*(x))) +#define i_abs(x) abs(*(x)) +#define i_dnnt(x) ((integer)u_nint(*(x))) +#define i_len(s, n) (n) +#define i_nint(x) ((integer)u_nint(*(x))) +#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b))) +#define pow_dd(ap, bp) ( pow(*(ap), *(bp))) +#define pow_si(B,E) spow_ui(*(B),*(E)) +#define pow_ri(B,E) spow_ui(*(B),*(E)) +#define pow_di(B,E) dpow_ui(*(B),*(E)) +#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));} +#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));} +#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));} +#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; } +#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d)))) +#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; } +#define sig_die(s, kill) { exit(1); } +#define s_stop(s, n) {exit(0);} +static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n"; +#define z_abs(z) (cabs(Cd(z))) +#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));} +#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));} +#define myexit_() break; +#define mycycle_() continue; +#define myceiling_(w) {ceil(w)} +#define myhuge_(w) {HUGE_VAL} +//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);} +#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef logical (*L_fp)(...); +#else +typedef logical (*L_fp)(); +#endif + +static float spow_ui(float x, integer n) { + float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static double dpow_ui(double x, integer n) { + double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#ifdef _MSC_VER +static _Fcomplex cpow_ui(complex x, integer n) { + complex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i; + for(u = n; ; ) { + if(u & 01) pow.r *= x.r, pow.i *= x.i; + if(u >>= 1) x.r *= x.r, x.i *= x.i; + else break; + } + } + _Fcomplex p={pow.r, pow.i}; + return p; +} +#else +static _Complex float cpow_ui(_Complex float x, integer n) { + _Complex float pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +#ifdef _MSC_VER +static _Dcomplex zpow_ui(_Dcomplex x, integer n) { + _Dcomplex pow={1.0,0.0}; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1]; + for(u = n; ; ) { + if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1]; + if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1]; + else break; + } + } + _Dcomplex p = {pow._Val[0], pow._Val[1]}; + return p; +} +#else +static _Complex double zpow_ui(_Complex double x, integer n) { + _Complex double pow=1.0; unsigned long int u; + if(n != 0) { + if(n < 0) n = -n, x = 1/x; + for(u = n; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +#endif +static integer pow_ii(integer x, integer n) { + integer pow; unsigned long int u; + if (n <= 0) { + if (n == 0 || x == 1) pow = 1; + else if (x != -1) pow = x == 0 ? 1/x : 0; + else n = -n; + } + if ((n > 0) || !(n == 0 || x == 1 || x != -1)) { + u = n; + for(pow = 1; ; ) { + if(u & 01) pow *= x; + if(u >>= 1) x *= x; + else break; + } + } + return pow; +} +static integer dmaxloc_(double *w, integer s, integer e, integer *n) +{ + double m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static integer smaxloc_(float *w, integer s, integer e, integer *n) +{ + float m; integer i, mi; + for(m=w[s-1], mi=s, i=s+1; i<=e; i++) + if (w[i-1]>m) mi=i ,m=w[i-1]; + return mi-s+1; +} +static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) { + integer n = *n_, incx = *incx_, incy = *incy_, i; +#ifdef _MSC_VER + _Fcomplex zdotc = {0.0, 0.0}; + if (incx == 1 && incy == 1) { + for (i=0;i \brief \b ZDRSCL multiplies a vector by the reciprocal of a real scalar. */ + +/* =========== DOCUMENTATION =========== */ + +/* Online html documentation available at */ +/* http://www.netlib.org/lapack/explore-html/ */ + +/* > \htmlonly */ +/* > Download ZDRSCL + dependencies */ +/* > */ +/* > [TGZ] */ +/* > */ +/* > [ZIP] */ +/* > */ +/* > [TXT] */ +/* > \endhtmlonly */ + +/* Definition: */ +/* =========== */ + +/* SUBROUTINE ZRSCL( N, A, X, INCX ) */ + +/* INTEGER INCX, N */ +/* COMPLEX*16 A */ +/* COMPLEX*16 X( * ) */ + + +/* > \par Purpose: */ +/* ============= */ +/* > */ +/* > \verbatim */ +/* > */ +/* > ZRSCL multiplies an n-element complex vector x by the complex scalar */ +/* > 1/a. This is done without overflow or underflow as long as */ +/* > the final result x/a does not overflow or underflow. */ +/* > \endverbatim */ + +/* Arguments: */ +/* ========== */ + +/* > \param[in] N */ +/* > \verbatim */ +/* > N is INTEGER */ +/* > The number of components of the vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] A */ +/* > \verbatim */ +/* > A is COMPLEX*16 */ +/* > The scalar a which is used to divide each component of x. */ +/* > A must not be 0, or the subroutine will divide by zero. */ +/* > \endverbatim */ +/* > */ +/* > \param[in,out] X */ +/* > \verbatim */ +/* > X is COMPLEX*16 array, dimension */ +/* > (1+(N-1)*abs(INCX)) */ +/* > The n-element vector x. */ +/* > \endverbatim */ +/* > */ +/* > \param[in] INCX */ +/* > \verbatim */ +/* > INCX is INTEGER */ +/* > The increment between successive values of the vector SX. */ +/* > > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n */ +/* > \endverbatim */ + +/* Authors: */ +/* ======== */ + +/* > \author Univ. of Tennessee */ +/* > \author Univ. of California Berkeley */ +/* > \author Univ. of Colorado Denver */ +/* > \author NAG Ltd. */ + +/* > \ingroup complex16OTHERauxiliary */ + +/* ===================================================================== */ +/* Subroutine */ int zrscl_(integer *n, doublecomplex *a, doublecomplex *x, + integer *incx) +{ + /* System generated locals */ + doublereal d__1, d__2; + doublecomplex z__1; + + /* Local variables */ + doublereal absi, absr; + extern /* Subroutine */ int zscal_(integer *, doublecomplex *, + doublecomplex *, integer *); + doublereal ai, ar; + extern doublereal dlamch_(char *); + doublereal ui, ov, ur, safmin, safmax; + extern /* Subroutine */ int zdscal_(integer *, doublereal *, + doublecomplex *, integer *), zdrscl_(integer *, doublereal *, + doublecomplex *, integer *); + + +/* -- LAPACK auxiliary routine -- */ +/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */ +/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */ + + +/* ===================================================================== */ + + +/* Quick return if possible */ + + /* Parameter adjustments */ + --x; + + /* Function Body */ + if (*n <= 0) { + return 0; + } + +/* Get machine parameters */ + + safmin = dlamch_("S"); + safmax = 1. / safmin; + ov = dlamch_("O"); + +/* Initialize constants related to A. */ + + ar = a->r; + ai = d_imag(a); + absr = abs(ar); + absi = abs(ai); + + if (ai == 0.) { +/* If alpha is real, then we can use csrscl */ + zdrscl_(n, &ar, &x[1], incx); + + } else if (ar == 0.) { +/* If alpha has a zero real part, then we follow the same rules as if */ +/* alpha were real. */ + if (absi > safmax) { + zdscal_(n, &safmin, &x[1], incx); + d__1 = -safmax / ai; + z__1.r = 0., z__1.i = d__1; + zscal_(n, &z__1, &x[1], incx); + } else if (absi < safmin) { + d__1 = -safmin / ai; + z__1.r = 0., z__1.i = d__1; + zscal_(n, &z__1, &x[1], incx); + zdscal_(n, &safmax, &x[1], incx); + } else { + d__1 = -1. / ai; + z__1.r = 0., z__1.i = d__1; + zscal_(n, &z__1, &x[1], incx); + } + + } else { +/* The following numbers can be computed. */ +/* They are the inverse of the real and imaginary parts of 1/alpha. */ +/* Note that a and b are always different from zero. */ +/* NaNs are only possible if either: */ +/* 1. alphaR or alphaI is NaN. */ +/* 2. alphaR and alphaI are both infinite, in which case it makes sense */ +/* to propagate a NaN. */ + ur = ar + ai * (ai / ar); + ui = ai + ar * (ar / ai); + + if (abs(ur) < safmin || abs(ui) < safmin) { +/* This means that both alphaR and alphaI are very small. */ + d__1 = safmin / ur; + d__2 = -safmin / ui; + z__1.r = d__1, z__1.i = d__2; + zscal_(n, &z__1, &x[1], incx); + zdscal_(n, &safmax, &x[1], incx); + } else if (abs(ur) > safmax || abs(ui) > safmax) { + if (absr > ov || absi > ov) { +/* This means that a and b are both Inf. No need for scaling. */ + d__1 = 1. / ur; + d__2 = -1. / ui; + z__1.r = d__1, z__1.i = d__2; + zscal_(n, &z__1, &x[1], incx); + } else { + zdscal_(n, &safmin, &x[1], incx); + if (abs(ur) > ov || abs(ui) > ov) { +/* Infs were generated. We do proper scaling to avoid them. */ + if (absr >= absi) { +/* ABS( UR ) <= ABS( UI ) */ + ur = safmin * ar + safmin * (ai * (ai / ar)); + ui = safmin * ai + ar * (safmin * ar / ai); + } else { +/* ABS( UR ) > ABS( UI ) */ + ur = safmin * ar + ai * (safmin * ai / ar); + ui = safmin * ai + safmin * (ar * (ar / ai)); + } + d__1 = 1. / ur; + d__2 = -1. / ui; + z__1.r = d__1, z__1.i = d__2; + zscal_(n, &z__1, &x[1], incx); + } else { + d__1 = safmax / ur; + d__2 = -safmax / ui; + z__1.r = d__1, z__1.i = d__2; + zscal_(n, &z__1, &x[1], incx); + } + } + } else { + d__1 = 1. / ur; + d__2 = -1. / ui; + z__1.r = d__1, z__1.i = d__2; + zscal_(n, &z__1, &x[1], incx); + } + } + + return 0; + +/* End of ZRSCL */ + +} /* zrscl_ */ + diff --git a/lapack-netlib/SRC/zrscl.f b/lapack-netlib/SRC/zrscl.f new file mode 100644 index 000000000..970f6de75 --- /dev/null +++ b/lapack-netlib/SRC/zrscl.f @@ -0,0 +1,203 @@ +*> \brief \b ZDRSCL multiplies a vector by the reciprocal of a real scalar. +* +* =========== DOCUMENTATION =========== +* +* Online html documentation available at +* http://www.netlib.org/lapack/explore-html/ +* +*> \htmlonly +*> Download ZDRSCL + dependencies +*> +*> [TGZ] +*> +*> [ZIP] +*> +*> [TXT] +*> \endhtmlonly +* +* Definition: +* =========== +* +* SUBROUTINE ZRSCL( N, A, X, INCX ) +* +* .. Scalar Arguments .. +* INTEGER INCX, N +* COMPLEX*16 A +* .. +* .. Array Arguments .. +* COMPLEX*16 X( * ) +* .. +* +* +*> \par Purpose: +* ============= +*> +*> \verbatim +*> +*> ZRSCL multiplies an n-element complex vector x by the complex scalar +*> 1/a. This is done without overflow or underflow as long as +*> the final result x/a does not overflow or underflow. +*> \endverbatim +* +* Arguments: +* ========== +* +*> \param[in] N +*> \verbatim +*> N is INTEGER +*> The number of components of the vector x. +*> \endverbatim +*> +*> \param[in] A +*> \verbatim +*> A is COMPLEX*16 +*> The scalar a which is used to divide each component of x. +*> A must not be 0, or the subroutine will divide by zero. +*> \endverbatim +*> +*> \param[in,out] X +*> \verbatim +*> X is COMPLEX*16 array, dimension +*> (1+(N-1)*abs(INCX)) +*> The n-element vector x. +*> \endverbatim +*> +*> \param[in] INCX +*> \verbatim +*> INCX is INTEGER +*> The increment between successive values of the vector SX. +*> > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n +*> \endverbatim +* +* Authors: +* ======== +* +*> \author Univ. of Tennessee +*> \author Univ. of California Berkeley +*> \author Univ. of Colorado Denver +*> \author NAG Ltd. +* +*> \ingroup complex16OTHERauxiliary +* +* ===================================================================== + SUBROUTINE ZRSCL( N, A, X, INCX ) +* +* -- LAPACK auxiliary routine -- +* -- LAPACK is a software package provided by Univ. of Tennessee, -- +* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- +* +* .. Scalar Arguments .. + INTEGER INCX, N + COMPLEX*16 A +* .. +* .. Array Arguments .. + COMPLEX*16 X( * ) +* .. +* +* ===================================================================== +* +* .. Parameters .. + DOUBLE PRECISION ZERO, ONE + PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) +* .. +* .. Local Scalars .. + DOUBLE PRECISION SAFMAX, SAFMIN, OV, AR, AI, ABSR, ABSI, UR, UI +* .. +* .. External Functions .. + DOUBLE PRECISION DLAMCH + COMPLEX*16 ZLADIV + EXTERNAL DLAMCH, ZLADIV +* .. +* .. External Subroutines .. + EXTERNAL DSCAL, ZDSCAL, ZDRSCL +* .. +* .. Intrinsic Functions .. + INTRINSIC ABS +* .. +* .. Executable Statements .. +* +* Quick return if possible +* + IF( N.LE.0 ) + $ RETURN +* +* Get machine parameters +* + SAFMIN = DLAMCH( 'S' ) + SAFMAX = ONE / SAFMIN + OV = DLAMCH( 'O' ) +* +* Initialize constants related to A. +* + AR = DBLE( A ) + AI = DIMAG( A ) + ABSR = ABS( AR ) + ABSI = ABS( AI ) +* + IF( AI.EQ.ZERO ) THEN +* If alpha is real, then we can use csrscl + CALL ZDRSCL( N, AR, X, INCX ) +* + ELSE IF( AR.EQ.ZERO ) THEN +* If alpha has a zero real part, then we follow the same rules as if +* alpha were real. + IF( ABSI.GT.SAFMAX ) THEN + CALL ZDSCAL( N, SAFMIN, X, INCX ) + CALL ZSCAL( N, DCMPLX( ZERO, -SAFMAX / AI ), X, INCX ) + ELSE IF( ABSI.LT.SAFMIN ) THEN + CALL ZSCAL( N, DCMPLX( ZERO, -SAFMIN / AI ), X, INCX ) + CALL ZDSCAL( N, SAFMAX, X, INCX ) + ELSE + CALL ZSCAL( N, DCMPLX( ZERO, -ONE / AI ), X, INCX ) + END IF +* + ELSE +* The following numbers can be computed. +* They are the inverse of the real and imaginary parts of 1/alpha. +* Note that a and b are always different from zero. +* NaNs are only possible if either: +* 1. alphaR or alphaI is NaN. +* 2. alphaR and alphaI are both infinite, in which case it makes sense +* to propagate a NaN. + UR = AR + AI * ( AI / AR ) + UI = AI + AR * ( AR / AI ) +* + IF( (ABS( UR ).LT.SAFMIN).OR.(ABS( UI ).LT.SAFMIN) ) THEN +* This means that both alphaR and alphaI are very small. + CALL ZSCAL( N, DCMPLX( SAFMIN / UR, -SAFMIN / UI ), X, + $ INCX ) + CALL ZDSCAL( N, SAFMAX, X, INCX ) + ELSE IF( (ABS( UR ).GT.SAFMAX).OR.(ABS( UI ).GT.SAFMAX) ) THEN + IF( (ABSR.GT.OV).OR.(ABSI.GT.OV) ) THEN +* This means that a and b are both Inf. No need for scaling. + CALL ZSCAL( N, DCMPLX( ONE / UR, -ONE / UI ), X, INCX ) + ELSE + CALL ZDSCAL( N, SAFMIN, X, INCX ) + IF( (ABS( UR ).GT.OV).OR.(ABS( UI ).GT.OV) ) THEN +* Infs were generated. We do proper scaling to avoid them. + IF( ABSR.GE.ABSI ) THEN +* ABS( UR ) <= ABS( UI ) + UR = (SAFMIN * AR) + SAFMIN * (AI * ( AI / AR )) + UI = (SAFMIN * AI) + AR * ( (SAFMIN * AR) / AI ) + ELSE +* ABS( UR ) > ABS( UI ) + UR = (SAFMIN * AR) + AI * ( (SAFMIN * AI) / AR ) + UI = (SAFMIN * AI) + SAFMIN * (AR * ( AR / AI )) + END IF + CALL ZSCAL( N, DCMPLX( ONE / UR, -ONE / UI ), X, + $ INCX ) + ELSE + CALL ZSCAL( N, DCMPLX( SAFMAX / UR, -SAFMAX / UI ), + $ X, INCX ) + END IF + END IF + ELSE + CALL ZSCAL( N, DCMPLX( ONE / UR, -ONE / UI ), X, INCX ) + END IF + END IF +* + RETURN +* +* End of ZRSCL +* + END diff --git a/lapack-netlib/SRC/zstemr.f b/lapack-netlib/SRC/zstemr.f index b034198de..4eaf5ef97 100644 --- a/lapack-netlib/SRC/zstemr.f +++ b/lapack-netlib/SRC/zstemr.f @@ -320,7 +320,7 @@ *> \author Univ. of Colorado Denver *> \author NAG Ltd. * -*> \ingroup complex16OTHERcomputational +*> \ingroup stemr * *> \par Contributors: * ================== @@ -330,6 +330,7 @@ *> Inderjit Dhillon, University of Texas, Austin, USA \n *> Osni Marques, LBNL/NERSC, USA \n *> Christof Voemel, University of California, Berkeley, USA \n +*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n * * ===================================================================== SUBROUTINE ZSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU, @@ -361,7 +362,8 @@ $ MINRGP = 1.0D-3 ) * .. * .. Local Scalars .. - LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY + LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY, + $ LAESWAP INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW, $ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD, $ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP, @@ -397,6 +399,7 @@ * LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) ) ZQUERY = ( NZC.EQ.-1 ) + LAESWAP = .FALSE. * DSTEMR needs WORK of size 6*N, IWORK of size 3*N. * In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N. @@ -519,6 +522,15 @@ ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN CALL DLAEV2( D(1), E(1), D(2), R1, R2, CS, SN ) END IF +* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However, +* the following code requires R1 >= R2. Hence, we correct +* the order of R1, R2, CS, SN if R1 < R2 before further processing. + IF( R1.LT.R2 ) THEN + E(2) = R1 + R1 = R2 + R2 = E(2) + LAESWAP = .TRUE. + ENDIF IF( ALLEIG.OR. $ (VALEIG.AND.(R2.GT.WL).AND. $ (R2.LE.WU)).OR. @@ -526,8 +538,13 @@ M = M+1 W( M ) = R2 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = -SN - Z( 2, M ) = CS + IF( LAESWAP ) THEN + Z( 1, M ) = CS + Z( 2, M ) = SN + ELSE + Z( 1, M ) = -SN + Z( 2, M ) = CS + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN @@ -550,8 +567,13 @@ M = M+1 W( M ) = R1 IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN - Z( 1, M ) = CS - Z( 2, M ) = SN + IF( LAESWAP ) THEN + Z( 1, M ) = -SN + Z( 2, M ) = CS + ELSE + Z( 1, M ) = CS + Z( 2, M ) = SN + ENDIF * Note: At most one of SN and CS can be zero. IF (SN.NE.ZERO) THEN IF (CS.NE.ZERO) THEN