Merge branch 'xianyi:develop' into issue4128

This commit is contained in:
Martin Kroeker 2023-07-14 13:33:52 +02:00 committed by GitHub
commit a6e41900fd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 2143 additions and 141 deletions

View File

@ -69,7 +69,7 @@ endif
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
@ -92,9 +92,14 @@ endif
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEV1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-v1
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.4-a+sve
ifeq (1, $(ISCLANG))
CCOMMON_OPT += -mtune=cortex-x1
else
CCOMMON_OPT += -mtune=neoverse-v1
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
endif
@ -122,8 +127,8 @@ endif
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEN2)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
ifneq ($(OSNAME), Darwin)
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
else
@ -155,7 +160,7 @@ endif
# Use a53 tunings because a55 is only available in GCC>=8.1
ifeq ($(CORE), CORTEXA55)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ8), 1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
@ -196,8 +201,13 @@ endif
endif
ifeq ($(CORE), THUNDERX3T110)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.3-a
ifeq (0, $(ISCLANG))
CCOMMON_OPT += -mtune=thunderx3t110
else
CCOMMON_OPT += -mtune=thunderx2t99
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
endif
@ -225,9 +235,12 @@ endif
endif
endif
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
ifeq ($(CORE), EMAG8180)
CCOMMON_OPT += -march=armv8-a -mtune=emag
CCOMMON_OPT += -march=armv8-a
ifeq ($(ISCLANG), 0)
CCOMMON_OPT += -mtune=emag
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=emag
endif

View File

@ -9,9 +9,12 @@ AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n
Cirrus CI: [![Build Status](https://api.cirrus-ci.com/github/xianyi/OpenBLAS.svg?branch=develop)](https://cirrus-ci.com/github/xianyi/OpenBLAS)
<!-- Drone CI: [![Build Status](https://cloud.drone.io/api/badges/xianyi/OpenBLAS/status.svg?branch=develop)](https://cloud.drone.io/xianyi/OpenBLAS/)-->
[![Build Status](https://dev.azure.com/xianyi/OpenBLAS/_apis/build/status/xianyi.OpenBLAS?branchName=develop)](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
OSUOSL POWERCI [![Build Status](https://powerci.osuosl.org/buildStatus/icon?job=OpenBLAS_gh%2Fdevelop)](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/)
OSUOSL IBMZ-CI [![Build Status](http://ibmz-ci.osuosl.org/buildStatus/icon?job=OpenBLAS-Z%2Fdevelop)](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/)
## Introduction
OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.

View File

@ -271,6 +271,19 @@ jobs:
- script: |
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
- job: OSX_xbuild_DYNAMIC_ARM64
pool:
vmImage: 'macOS-11'
variables:
CC: /Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.3.sdk -arch arm64
steps:
- script: |
ls /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
/Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus
/Applications/Xcode_11.7.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
- job: ALPINE_MUSL
pool:
vmImage: 'ubuntu-latest'

View File

@ -46,7 +46,7 @@ if (DYNAMIC_ARCH)
if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
set(DYNAMIC_CORE "${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2")
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2)
endif ()
if (DYNAMIC_LIST)
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})

View File

@ -187,7 +187,7 @@ set(CLASRC
cposv.f cposvx.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
crot.f cspcon.f csprfs.f cspsv.f
crot.f crscl.f cspcon.f csprfs.f cspsv.f
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
cstegr.f cstein.f csteqr.f csycon.f
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f
@ -381,7 +381,7 @@ set(ZLASRC
zposv.f zposvx.f zpotrf2.f zpotri.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zrot.f zrscl.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f zsycon.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f
@ -686,7 +686,7 @@ set(CLASRC
cposv.c cposvx.c cpotrf2.c cpotri.c cpstrf.c cpstf2.c
cppcon.c cppequ.c cpprfs.c cppsv.c cppsvx.c cpptrf.c cpptri.c cpptrs.c
cptcon.c cpteqr.c cptrfs.c cptsv.c cptsvx.c cpttrf.c cpttrs.c cptts2.c
crot.c cspcon.c csprfs.c cspsv.c
crot.c crscl.c cspcon.c csprfs.c cspsv.c
cspsvx.c csptrf.c csptri.c csptrs.c csrscl.c cstedc.c
cstegr.c cstein.c csteqr.c csycon.c
csyrfs.c csysv.c csysvx.c csytf2.c csytrf.c csytri.c
@ -878,7 +878,7 @@ set(ZLASRC
zposv.c zposvx.c zpotrf2.c zpotri.c zpotrs.c zpstrf.c zpstf2.c
zppcon.c zppequ.c zpprfs.c zppsv.c zppsvx.c zpptrf.c zpptri.c zpptrs.c
zptcon.c zpteqr.c zptrfs.c zptsv.c zptsvx.c zpttrf.c zpttrs.c zptts2.c
zrot.c zspcon.c zsprfs.c zspsv.c
zrot.c zrscl.c zspcon.c zsprfs.c zspsv.c
zspsvx.c zsptrf.c zsptri.c zsptrs.c zdrscl.c zstedc.c
zstegr.c zstein.c zsteqr.c zsycon.c
zsyrfs.c zsysv.c zsysvx.c zsytf2.c zsytrf.c zsytri.c

View File

@ -280,7 +280,29 @@ if (DEFINED TARGET)
if (${TARGET} STREQUAL POWER8)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math")
endif()
if (${TARGET} STREQUAL NEOVERSEV1)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.")
endif()
endif()
if (${TARGET} STREQUAL NEOVERSEN2)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.")
endif()
endif()
if (${TARGET} STREQUAL ARMV8SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
endif()
endif()
if (DEFINED BINARY)
message(STATUS "Compiling a ${BINARY}-bit binary.")
endif ()

View File

@ -83,6 +83,19 @@ static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#ifndef NO_AFFINITY
static inline int WhereAmI(void){
int ret = 0, counter = 0;
__asm__ volatile (
"rdtimel.w %[counter], %[id]"
: [id]"=r"(ret), [counter]"=r"(counter)
:
: "memory"
);
return ret;
}
#endif
#ifdef DOUBLE
#define GET_IMAGE(res) __asm__ __volatile__("fmov.d %0, $f2" : "=f"(res) : : "memory")
#else

View File

@ -100,13 +100,13 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
if ( order == BlasColMajor)
{
if ( trans == BlasNoTrans && *ldb < *rows ) info = 9;
if ( trans == BlasTrans && *ldb < *cols ) info = 9;
if ( trans == BlasNoTrans && *ldb < *rows ) info = 8;
if ( trans == BlasTrans && *ldb < *cols ) info = 8;
}
if ( order == BlasRowMajor)
{
if ( trans == BlasNoTrans && *ldb < *cols ) info = 9;
if ( trans == BlasTrans && *ldb < *rows ) info = 9;
if ( trans == BlasNoTrans && *ldb < *cols ) info = 8;
if ( trans == BlasTrans && *ldb < *rows ) info = 8;
}
if ( order == BlasColMajor && *lda < *rows ) info = 7;

View File

@ -50,8 +50,8 @@ static FLOAT dot_kernel_sve(BLASLONG n, FLOAT *x, FLOAT *y) {
BLASLONG sve_width = SVE_WIDTH;
for (BLASLONG i = 0; i < n; i += sve_width * 2) {
svbool_t pg_a = SVE_WHILELT(i, n);
svbool_t pg_b = SVE_WHILELT(i + sve_width, n);
svbool_t pg_a = SVE_WHILELT((uint64_t)i, (uint64_t)n);
svbool_t pg_b = SVE_WHILELT((uint64_t)(i + sve_width), (uint64_t)n);
SVE_TYPE x_vec_a = svld1(pg_a, &x[i]);
SVE_TYPE y_vec_a = svld1(pg_a, &y[i]);

View File

@ -107,7 +107,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b) {
BLASLONG remaining_n = n - single_vectors_n;
if (remaining_n) {
a_offset_inner = a_offset;
svbool_t pg = SV_WHILE(0L, remaining_n);
svbool_t pg = SV_WHILE((uint64_t)0L, (uint64_t)remaining_n);
uint64_t active = remaining_n;
uint64_t i_cnt = m >> 2;
while (i_cnt--) {

View File

@ -100,7 +100,7 @@ int CNAME(BLASLONG m, BLASLONG n, IFLOAT *a, BLASLONG lda, IFLOAT *b){
BLASLONG remaining_n = n - single_vectors_n;
if (remaining_n) {
a_offset_inner = a_offset;
svbool_t pg = SV_WHILE(0L, remaining_n);
svbool_t pg = SV_WHILE((uint64_t)0L, (uint64_t)remaining_n);
uint64_t active = remaining_n;
uint64_t i_cnt = m >> 2;
while (i_cnt--) {

View File

@ -52,11 +52,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
FLOAT *ao;
#ifdef DOUBLE
svint64_t index = svindex_s64(0LL, lda);
svbool_t pn = svwhilelt_b64(js, n);
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b64(svptrue_b64(), pn);
#else
svint32_t index = svindex_s32(0, lda);
svbool_t pn = svwhilelt_b32(js, n);
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b32(svptrue_b32(), pn);
#endif
do
@ -123,11 +123,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
posY += n_active;
js += n_active;
#ifdef DOUBLE
pn = svwhilelt_b64(js, n);
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
n_active = svcntp_b64(svptrue_b64(), pn);
} while (svptest_any(svptrue_b64(), pn));
#else
pn = svwhilelt_b32(js, n);
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
n_active = svcntp_b32(svptrue_b32(), pn);
} while (svptest_any(svptrue_b32(), pn));
#endif

View File

@ -51,10 +51,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
FLOAT *ao;
js = 0;
#ifdef DOUBLE
svbool_t pn = svwhilelt_b64(js, n);
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b64(svptrue_b64(), pn);
#else
svbool_t pn = svwhilelt_b32(js, n);
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b32(svptrue_b32(), pn);
#endif
do
@ -122,11 +122,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
posY += n_active;
js += n_active;
#ifdef DOUBLE
pn = svwhilelt_b64(js, n);
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
n_active = svcntp_b64(svptrue_b64(), pn);
} while (svptest_any(svptrue_b64(), pn));
#else
pn = svwhilelt_b32(js, n);
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
n_active = svcntp_b32(svptrue_b32(), pn);
} while (svptest_any(svptrue_b32(), pn));
#endif

View File

@ -52,11 +52,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
FLOAT *ao;
#ifdef DOUBLE
svint64_t index = svindex_s64(0LL, lda);
svbool_t pn = svwhilelt_b64(js, n);
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b64(svptrue_b64(), pn);
#else
svint32_t index = svindex_s32(0, lda);
svbool_t pn = svwhilelt_b32(js, n);
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b32(svptrue_b32(), pn);
#endif
do
@ -123,11 +123,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
posY += n_active;
js += n_active;
#ifdef DOUBLE
pn = svwhilelt_b64(js, n);
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
n_active = svcntp_b64(svptrue_b64(), pn);
} while (svptest_any(svptrue_b64(), pn));
#else
pn = svwhilelt_b32(js, n);
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
n_active = svcntp_b32(svptrue_b32(), pn);
} while (svptest_any(svptrue_b32(), pn));
#endif

View File

@ -51,10 +51,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
FLOAT *ao;
js = 0;
#ifdef DOUBLE
svbool_t pn = svwhilelt_b64(js, n);
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b64(svptrue_b64(), pn);
#else
svbool_t pn = svwhilelt_b32(js, n);
svbool_t pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b32(svptrue_b32(), pn);
#endif
do
@ -121,11 +121,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
posY += n_active;
js += n_active;
#ifdef DOUBLE
pn = svwhilelt_b64(js, n);
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
n_active = svcntp_b64(svptrue_b64(), pn);
} while (svptest_any(svptrue_b64(), pn));
#else
pn = svwhilelt_b32(js, n);
pn = svwhilelt_b32((uint64_t)js, (uint64_t)n);
n_active = svcntp_b32(svptrue_b32(), pn);
} while (svptest_any(svptrue_b32(), pn));
#endif

View File

@ -56,13 +56,13 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
#ifdef DOUBLE
int64_t js = 0;
svint64_t index = svindex_s64(0LL, lda);
svbool_t pn = svwhilelt_b64(js, n);
svbool_t pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
int n_active = svcntp_b64(svptrue_b64(), pn);
#else
int32_t N = n;
int32_t js = 0;
svint32_t index = svindex_s32(0, lda);
svbool_t pn = svwhilelt_b32(js, N);
svbool_t pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
int n_active = svcntp_b32(svptrue_b32(), pn);
#endif
do {
@ -106,11 +106,11 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
js += n_active;
#ifdef DOUBLE
pn = svwhilelt_b64(js, n);
pn = svwhilelt_b64((uint64_t)js, (uint64_t)n);
n_active = svcntp_b64(svptrue_b64(), pn);
} while (svptest_any(svptrue_b64(), pn));
#else
pn = svwhilelt_b32(js, N);
pn = svwhilelt_b32((uint32_t)js, (uint32_t)N);
n_active = svcntp_b32(svptrue_b32(), pn);
} while (svptest_any(svptrue_b32(), pn));
#endif

View File

@ -43,7 +43,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
BLASLONG i, ii, j, jj;
FLOAT data01, data02;
FLOAT data01=0.0, data02=0.0;
FLOAT *a1;
lda *= 2;

View File

@ -47,6 +47,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG offset, FLOAT
FLOAT data05, data06, data07, data08;
FLOAT *a1, *a2;
data01=data02=data07=data08=0.0;
lda *= 2;
jj = offset;

View File

@ -58,7 +58,7 @@ lapack_int LAPACKE_clarfb( int matrix_layout, char side, char trans, char direct
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) {
LAPACKE_xerbla( "LAPACKE_clarfb", -8 );

View File

@ -60,7 +60,7 @@ lapack_int LAPACKE_clarfb_work( int matrix_layout, char side, char trans,
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
ldc_t = MAX(1,m);
ldt_t = MAX(1,k);

View File

@ -57,7 +57,7 @@ lapack_int LAPACKE_dlarfb( int matrix_layout, char side, char trans, char direct
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) {
LAPACKE_xerbla( "LAPACKE_dlarfb", -8 );

View File

@ -59,7 +59,7 @@ lapack_int LAPACKE_dlarfb_work( int matrix_layout, char side, char trans,
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
ldc_t = MAX(1,m);
ldt_t = MAX(1,k);

View File

@ -57,7 +57,7 @@ lapack_int LAPACKE_slarfb( int matrix_layout, char side, char trans, char direct
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) {
LAPACKE_xerbla( "LAPACKE_slarfb", -8 );

View File

@ -59,7 +59,7 @@ lapack_int LAPACKE_slarfb_work( int matrix_layout, char side, char trans,
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
ldc_t = MAX(1,m);
ldt_t = MAX(1,k);

View File

@ -58,7 +58,7 @@ lapack_int LAPACKE_zlarfb( int matrix_layout, char side, char trans, char direct
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
if( ( col && k > nrows_v ) || ( !col && k > ncols_v ) ) {
LAPACKE_xerbla( "LAPACKE_zlarfb", -8 );

View File

@ -60,7 +60,7 @@ lapack_int LAPACKE_zlarfb_work( int matrix_layout, char side, char trans,
nrows_v = ( col && left ) ? m : ( ( col && !left ) ? n : ( !col ? k : 1) );
ncols_v = ( !col && left ) ? m : ( ( !col && !left ) ? n : ( col ? k : 1 ) );
uplo = ( ( left && col ) || !( left || col ) ) ? 'l' : 'u';
uplo = ( ( forward && col ) || !( forward || col ) ) ? 'l' : 'u';
ldc_t = MAX(1,m);
ldt_t = MAX(1,k);

View File

@ -280,7 +280,7 @@ CLASRC_O = \
cposv.o cposvx.o cpotf2.o cpotri.o cpstrf.o cpstf2.o \
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
crot.o crscl.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
cspsvx.o csptrf.o csptri.o csptrs.o csrscl.o cstedc.o \
cstegr.o cstein.o csteqr.o \
csycon.o csymv.o \
@ -488,7 +488,7 @@ ZLASRC_O = \
zposv.o zposvx.o zpotf2.o zpotrf.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
zrot.o zrscl.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
zspsvx.o zsptrf.o zsptri.o zsptrs.o zdrscl.o zstedc.o \
zstegr.o zstein.o zsteqr.o \
zsycon.o zsymv.o \

View File

@ -170,7 +170,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complexGEsolve
*> \ingroup gelss
*
* =====================================================================
SUBROUTINE CGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
@ -214,8 +214,7 @@
* .. External Subroutines ..
EXTERNAL CBDSQR, CCOPY, CGEBRD, CGELQF, CGEMM, CGEMV,
$ CGEQRF, CLACPY, CLASCL, CLASET, CSRSCL, CUNGBR,
$ CUNMBR, CUNMLQ, CUNMQR, SLABAD, SLASCL, SLASET,
$ XERBLA
$ CUNMBR, CUNMLQ, CUNMQR, SLASCL, SLASET, XERBLA
* ..
* .. External Functions ..
INTEGER ILAENV
@ -388,7 +387,6 @@
SFMIN = SLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL SLABAD( SMLNUM, BIGNUM )
*
* Scale A if max element outside range [SMLNUM,BIGNUM]
*
@ -540,7 +538,7 @@
$ LDB, CZERO, WORK, N )
CALL CLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
20 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL CGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
CALL CCOPY( N, WORK, 1, B, 1 )
END IF
@ -645,7 +643,7 @@
CALL CLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
$ LDB )
40 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL CGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ),
$ 1, CZERO, WORK( IWORK ), 1 )
CALL CCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
@ -737,7 +735,7 @@
$ LDB, CZERO, WORK, N )
CALL CLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
60 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL CGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
CALL CCOPY( N, WORK, 1, B, 1 )
END IF

View File

@ -101,7 +101,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complexGEcomputational
*> \ingroup getf2
*
* =====================================================================
SUBROUTINE CGETF2( M, N, A, LDA, IPIV, INFO )
@ -126,16 +126,14 @@
$ ZERO = ( 0.0E+0, 0.0E+0 ) )
* ..
* .. Local Scalars ..
REAL SFMIN
INTEGER I, J, JP
INTEGER J, JP
* ..
* .. External Functions ..
REAL SLAMCH
INTEGER ICAMAX
EXTERNAL SLAMCH, ICAMAX
EXTERNAL ICAMAX
* ..
* .. External Subroutines ..
EXTERNAL CGERU, CSCAL, CSWAP, XERBLA
EXTERNAL CGERU, CRSCL, CSWAP, XERBLA
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
@ -161,10 +159,6 @@
*
IF( M.EQ.0 .OR. N.EQ.0 )
$ RETURN
*
* Compute machine safe minimum
*
SFMIN = SLAMCH('S')
*
DO 10 J = 1, MIN( M, N )
*
@ -181,15 +175,8 @@
*
* Compute elements J+1:M of J-th column.
*
IF( J.LT.M ) THEN
IF( ABS(A( J, J )) .GE. SFMIN ) THEN
CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
ELSE
DO 20 I = 1, M-J
A( J+I, J ) = A( J+I, J ) / A( J, J )
20 CONTINUE
END IF
END IF
IF( J.LT.M )
$ CALL CRSCL( M-J, A( J, J ), A( J+1, J ), 1 )
*
ELSE IF( INFO.EQ.0 ) THEN
*

735
lapack-netlib/SRC/crscl.c Normal file
View File

@ -0,0 +1,735 @@
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <complex.h>
#ifdef complex
#undef complex
#endif
#ifdef I
#undef I
#endif
#if defined(_WIN64)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifdef LAPACK_ILP64
typedef BLASLONG blasint;
#if defined(_WIN64)
#define blasabs(x) llabs(x)
#else
#define blasabs(x) labs(x)
#endif
#else
typedef int blasint;
#define blasabs(x) abs(x)
#endif
typedef blasint integer;
typedef unsigned int uinteger;
typedef char *address;
typedef short int shortint;
typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
typedef char integer1;
#define TRUE_ (1)
#define FALSE_ (0)
/* Extern is for use with -E */
#ifndef Extern
#define Extern extern
#endif
/* I/O stuff */
typedef int flag;
typedef int ftnlen;
typedef int ftnint;
/*external read, write*/
typedef struct
{ flag cierr;
ftnint ciunit;
flag ciend;
char *cifmt;
ftnint cirec;
} cilist;
/*internal read, write*/
typedef struct
{ flag icierr;
char *iciunit;
flag iciend;
char *icifmt;
ftnint icirlen;
ftnint icirnum;
} icilist;
/*open*/
typedef struct
{ flag oerr;
ftnint ounit;
char *ofnm;
ftnlen ofnmlen;
char *osta;
char *oacc;
char *ofm;
ftnint orl;
char *oblnk;
} olist;
/*close*/
typedef struct
{ flag cerr;
ftnint cunit;
char *csta;
} cllist;
/*rewind, backspace, endfile*/
typedef struct
{ flag aerr;
ftnint aunit;
} alist;
/* inquire */
typedef struct
{ flag inerr;
ftnint inunit;
char *infile;
ftnlen infilen;
ftnint *inex; /*parameters in standard's order*/
ftnint *inopen;
ftnint *innum;
ftnint *innamed;
char *inname;
ftnlen innamlen;
char *inacc;
ftnlen inacclen;
char *inseq;
ftnlen inseqlen;
char *indir;
ftnlen indirlen;
char *infmt;
ftnlen infmtlen;
char *inform;
ftnint informlen;
char *inunf;
ftnlen inunflen;
ftnint *inrecl;
ftnint *innrec;
char *inblank;
ftnlen inblanklen;
} inlist;
#define VOID void
union Multitype { /* for multiple entry points */
integer1 g;
shortint h;
integer i;
/* longint j; */
real r;
doublereal d;
complex c;
doublecomplex z;
};
typedef union Multitype Multitype;
struct Vardesc { /* for Namelist */
char *name;
char *addr;
ftnlen *dims;
int type;
};
typedef struct Vardesc Vardesc;
struct Namelist {
char *name;
Vardesc **vars;
int nvars;
};
typedef struct Namelist Namelist;
#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (fabs(x))
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
#define dmin(a,b) (f2cmin(a,b))
#define dmax(a,b) (f2cmax(a,b))
#define bit_test(a,b) ((a) >> (b) & 1)
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
#define abort_() { sig_die("Fortran abort routine called", 1); }
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
#endif
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
#define d_abs(x) (fabs(*(x)))
#define d_acos(x) (acos(*(x)))
#define d_asin(x) (asin(*(x)))
#define d_atan(x) (atan(*(x)))
#define d_atn2(x, y) (atan2(*(x),*(y)))
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
#define d_cos(x) (cos(*(x)))
#define d_cosh(x) (cosh(*(x)))
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
#define d_exp(x) (exp(*(x)))
#define d_imag(z) (cimag(Cd(z)))
#define r_imag(z) (cimagf(Cf(z)))
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define d_log(x) (log(*(x)))
#define d_mod(x, y) (fmod(*(x), *(y)))
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
#define d_nint(x) u_nint(*(x))
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
#define d_sign(a,b) u_sign(*(a),*(b))
#define r_sign(a,b) u_sign(*(a),*(b))
#define d_sin(x) (sin(*(x)))
#define d_sinh(x) (sinh(*(x)))
#define d_sqrt(x) (sqrt(*(x)))
#define d_tan(x) (tan(*(x)))
#define d_tanh(x) (tanh(*(x)))
#define i_abs(x) abs(*(x))
#define i_dnnt(x) ((integer)u_nint(*(x)))
#define i_len(s, n) (n)
#define i_nint(x) ((integer)u_nint(*(x)))
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
#define pow_si(B,E) spow_ui(*(B),*(E))
#define pow_ri(B,E) spow_ui(*(B),*(E))
#define pow_di(B,E) dpow_ui(*(B),*(E))
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
#define myexit_() break;
#define mycycle_() continue;
#define myceiling_(w) {ceil(w)}
#define myhuge_(w) {HUGE_VAL}
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* > \brief \b CRSCL multiplies a vector by the reciprocal of a real scalar. */
/* =========== DOCUMENTATION =========== */
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
/* > \htmlonly */
/* > Download CRSCL + dependencies */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/crscl.f
"> */
/* > [TGZ]</a> */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/crscl.f
"> */
/* > [ZIP]</a> */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/crscl.f
"> */
/* > [TXT]</a> */
/* > \endhtmlonly */
/* Definition: */
/* =========== */
/* SUBROUTINE CRSCL( N, A, X, INCX ) */
/* INTEGER INCX, N */
/* COMPLEX A */
/* COMPLEX X( * ) */
/* > \par Purpose: */
/* ============= */
/* > */
/* > \verbatim */
/* > */
/* > CRSCL multiplies an n-element complex vector x by the complex scalar */
/* > 1/a. This is done without overflow or underflow as long as */
/* > the final result x/a does not overflow or underflow. */
/* > \endverbatim */
/* Arguments: */
/* ========== */
/* > \param[in] N */
/* > \verbatim */
/* > N is INTEGER */
/* > The number of components of the vector x. */
/* > \endverbatim */
/* > */
/* > \param[in] A */
/* > \verbatim */
/* > A is COMPLEX */
/* > The scalar a which is used to divide each component of x. */
/* > A must not be 0, or the subroutine will divide by zero. */
/* > \endverbatim */
/* > */
/* > \param[in,out] X */
/* > \verbatim */
/* > X is COMPLEX array, dimension */
/* > (1+(N-1)*abs(INCX)) */
/* > The n-element vector x. */
/* > \endverbatim */
/* > */
/* > \param[in] INCX */
/* > \verbatim */
/* > INCX is INTEGER */
/* > The increment between successive values of the vector X. */
/* > > 0: X(1) = X(1) and X(1+(i-1)*INCX) = x(i), 1< i<= n */
/* > \endverbatim */
/* Authors: */
/* ======== */
/* > \author Univ. of Tennessee */
/* > \author Univ. of California Berkeley */
/* > \author Univ. of Colorado Denver */
/* > \author NAG Ltd. */
/* > \ingroup complexOTHERauxiliary */
/* ===================================================================== */
/* Subroutine */ int crscl_(integer *n, complex *a, complex *x, integer *incx)
{
/* System generated locals */
real r__1, r__2;
complex q__1;
/* Local variables */
real absi, absr;
extern /* Subroutine */ int cscal_(integer *, complex *, complex *,
integer *);
real ai, ar, ui, ov, ur;
extern real slamch_(char *);
extern /* Subroutine */ int csscal_(integer *, real *, complex *, integer
*);
real safmin, safmax;
extern /* Subroutine */ int csrscl_(integer *, real *, complex *, integer
*);
/* -- LAPACK auxiliary routine -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
/* ===================================================================== */
/* Quick return if possible */
/* Parameter adjustments */
--x;
/* Function Body */
if (*n <= 0) {
return 0;
}
/* Get machine parameters */
safmin = slamch_("S");
safmax = 1.f / safmin;
ov = slamch_("O");
/* Initialize constants related to A. */
ar = a->r;
ai = r_imag(a);
absr = abs(ar);
absi = abs(ai);
if (ai == 0.f) {
/* If alpha is real, then we can use csrscl */
csrscl_(n, &ar, &x[1], incx);
} else if (ar == 0.f) {
/* If alpha has a zero real part, then we follow the same rules as if */
/* alpha were real. */
if (absi > safmax) {
csscal_(n, &safmin, &x[1], incx);
r__1 = -safmax / ai;
q__1.r = 0.f, q__1.i = r__1;
cscal_(n, &q__1, &x[1], incx);
} else if (absi < safmin) {
r__1 = -safmin / ai;
q__1.r = 0.f, q__1.i = r__1;
cscal_(n, &q__1, &x[1], incx);
csscal_(n, &safmax, &x[1], incx);
} else {
r__1 = -1.f / ai;
q__1.r = 0.f, q__1.i = r__1;
cscal_(n, &q__1, &x[1], incx);
}
} else {
/* The following numbers can be computed. */
/* They are the inverse of the real and imaginary parts of 1/alpha. */
/* Note that a and b are always different from zero. */
/* NaNs are only possible if either: */
/* 1. alphaR or alphaI is NaN. */
/* 2. alphaR and alphaI are both infinite, in which case it makes sense */
/* to propagate a NaN. */
ur = ar + ai * (ai / ar);
ui = ai + ar * (ar / ai);
if (abs(ur) < safmin || abs(ui) < safmin) {
/* This means that both alphaR and alphaI are very small. */
r__1 = safmin / ur;
r__2 = -safmin / ui;
q__1.r = r__1, q__1.i = r__2;
cscal_(n, &q__1, &x[1], incx);
csscal_(n, &safmax, &x[1], incx);
} else if (abs(ur) > safmax || abs(ui) > safmax) {
if (absr > ov || absi > ov) {
/* This means that a and b are both Inf. No need for scaling. */
r__1 = 1.f / ur;
r__2 = -1.f / ui;
q__1.r = r__1, q__1.i = r__2;
cscal_(n, &q__1, &x[1], incx);
} else {
csscal_(n, &safmin, &x[1], incx);
if (abs(ur) > ov || abs(ui) > ov) {
/* Infs were generated. We do proper scaling to avoid them. */
if (absr >= absi) {
/* ABS( UR ) <= ABS( UI ) */
ur = safmin * ar + safmin * (ai * (ai / ar));
ui = safmin * ai + ar * (safmin * ar / ai);
} else {
/* ABS( UR ) > ABS( UI ) */
ur = safmin * ar + ai * (safmin * ai / ar);
ui = safmin * ai + safmin * (ar * (ar / ai));
}
r__1 = 1.f / ur;
r__2 = -1.f / ui;
q__1.r = r__1, q__1.i = r__2;
cscal_(n, &q__1, &x[1], incx);
} else {
r__1 = safmax / ur;
r__2 = -safmax / ui;
q__1.r = r__1, q__1.i = r__2;
cscal_(n, &q__1, &x[1], incx);
}
}
} else {
r__1 = 1.f / ur;
r__2 = -1.f / ui;
q__1.r = r__1, q__1.i = r__2;
cscal_(n, &q__1, &x[1], incx);
}
}
return 0;
/* End of CRSCL */
} /* crscl_ */

202
lapack-netlib/SRC/crscl.f Normal file
View File

@ -0,0 +1,202 @@
*> \brief \b CRSCL multiplies a vector by the reciprocal of a real scalar.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download CRSCL + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/crscl.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/crscl.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/crscl.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE CRSCL( N, A, X, INCX )
*
* .. Scalar Arguments ..
* INTEGER INCX, N
* COMPLEX A
* ..
* .. Array Arguments ..
* COMPLEX X( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> CRSCL multiplies an n-element complex vector x by the complex scalar
*> 1/a. This is done without overflow or underflow as long as
*> the final result x/a does not overflow or underflow.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of components of the vector x.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is COMPLEX
*> The scalar a which is used to divide each component of x.
*> A must not be 0, or the subroutine will divide by zero.
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is COMPLEX array, dimension
*> (1+(N-1)*abs(INCX))
*> The n-element vector x.
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> The increment between successive values of the vector X.
*> > 0: X(1) = X(1) and X(1+(i-1)*INCX) = x(i), 1< i<= n
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complexOTHERauxiliary
*
* =====================================================================
SUBROUTINE CRSCL( N, A, X, INCX )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX, N
COMPLEX A
* ..
* .. Array Arguments ..
COMPLEX X( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
* ..
* .. Local Scalars ..
REAL SAFMAX, SAFMIN, OV, AR, AI, ABSR, ABSI, UR
% , UI
* ..
* .. External Functions ..
REAL SLAMCH
COMPLEX CLADIV
EXTERNAL SLAMCH, CLADIV
* ..
* .. External Subroutines ..
EXTERNAL CSCAL, CSSCAL, CSRSCL
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( N.LE.0 )
$ RETURN
*
* Get machine parameters
*
SAFMIN = SLAMCH( 'S' )
SAFMAX = ONE / SAFMIN
OV = SLAMCH( 'O' )
*
* Initialize constants related to A.
*
AR = REAL( A )
AI = AIMAG( A )
ABSR = ABS( AR )
ABSI = ABS( AI )
*
IF( AI.EQ.ZERO ) THEN
* If alpha is real, then we can use csrscl
CALL CSRSCL( N, AR, X, INCX )
*
ELSE IF( AR.EQ.ZERO ) THEN
* If alpha has a zero real part, then we follow the same rules as if
* alpha were real.
IF( ABSI.GT.SAFMAX ) THEN
CALL CSSCAL( N, SAFMIN, X, INCX )
CALL CSCAL( N, CMPLX( ZERO, -SAFMAX / AI ), X, INCX )
ELSE IF( ABSI.LT.SAFMIN ) THEN
CALL CSCAL( N, CMPLX( ZERO, -SAFMIN / AI ), X, INCX )
CALL CSSCAL( N, SAFMAX, X, INCX )
ELSE
CALL CSCAL( N, CMPLX( ZERO, -ONE / AI ), X, INCX )
END IF
*
ELSE
* The following numbers can be computed.
* They are the inverse of the real and imaginary parts of 1/alpha.
* Note that a and b are always different from zero.
* NaNs are only possible if either:
* 1. alphaR or alphaI is NaN.
* 2. alphaR and alphaI are both infinite, in which case it makes sense
* to propagate a NaN.
UR = AR + AI * ( AI / AR )
UI = AI + AR * ( AR / AI )
*
IF( (ABS( UR ).LT.SAFMIN).OR.(ABS( UI ).LT.SAFMIN) ) THEN
* This means that both alphaR and alphaI are very small.
CALL CSCAL( N, CMPLX( SAFMIN / UR, -SAFMIN / UI ), X, INCX )
CALL CSSCAL( N, SAFMAX, X, INCX )
ELSE IF( (ABS( UR ).GT.SAFMAX).OR.(ABS( UI ).GT.SAFMAX) ) THEN
IF( (ABSR.GT.OV).OR.(ABSI.GT.OV) ) THEN
* This means that a and b are both Inf. No need for scaling.
CALL CSCAL( N, CMPLX( ONE / UR, -ONE / UI ), X, INCX )
ELSE
CALL CSSCAL( N, SAFMIN, X, INCX )
IF( (ABS( UR ).GT.OV).OR.(ABS( UI ).GT.OV) ) THEN
* Infs were generated. We do proper scaling to avoid them.
IF( ABSR.GE.ABSI ) THEN
* ABS( UR ) <= ABS( UI )
UR = (SAFMIN * AR) + SAFMIN * (AI * ( AI / AR ))
UI = (SAFMIN * AI) + AR * ( (SAFMIN * AR) / AI )
ELSE
* ABS( UR ) > ABS( UI )
UR = (SAFMIN * AR) + AI * ( (SAFMIN * AI) / AR )
UI = (SAFMIN * AI) + SAFMIN * (AR * ( AR / AI ))
END IF
CALL CSCAL( N, CMPLX( ONE / UR, -ONE / UI ), X, INCX )
ELSE
CALL CSCAL( N, CMPLX( SAFMAX / UR, -SAFMAX / UI ),
$ X, INCX )
END IF
END IF
ELSE
CALL CSCAL( N, CMPLX( ONE / UR, -ONE / UI ), X, INCX )
END IF
END IF
*
RETURN
*
* End of CRSCL
*
END

View File

@ -320,7 +320,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complexOTHERcomputational
*> \ingroup stemr
*
*> \par Contributors:
* ==================
@ -329,7 +329,8 @@
*> Jim Demmel, University of California, Berkeley, USA \n
*> Inderjit Dhillon, University of Texas, Austin, USA \n
*> Osni Marques, LBNL/NERSC, USA \n
*> Christof Voemel, University of California, Berkeley, USA
*> Christof Voemel, University of California, Berkeley, USA \n
*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n
*
* =====================================================================
SUBROUTINE CSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU,
@ -361,7 +362,8 @@
$ MINRGP = 3.0E-3 )
* ..
* .. Local Scalars ..
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY,
$ LAESWAP
INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW,
$ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD,
$ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP,
@ -397,6 +399,7 @@
*
LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) )
ZQUERY = ( NZC.EQ.-1 )
LAESWAP = .FALSE.
* SSTEMR needs WORK of size 6*N, IWORK of size 3*N.
* In addition, SLARRE needs WORK of size 6*N, IWORK of size 5*N.
@ -519,6 +522,15 @@
ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
CALL SLAEV2( D(1), E(1), D(2), R1, R2, CS, SN )
END IF
* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However,
* the following code requires R1 >= R2. Hence, we correct
* the order of R1, R2, CS, SN if R1 < R2 before further processing.
IF( R1.LT.R2 ) THEN
E(2) = R1
R1 = R2
R2 = E(2)
LAESWAP = .TRUE.
ENDIF
IF( ALLEIG.OR.
$ (VALEIG.AND.(R2.GT.WL).AND.
$ (R2.LE.WU)).OR.
@ -526,8 +538,13 @@
M = M+1
W( M ) = R2
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = CS
Z( 2, M ) = SN
ELSE
Z( 1, M ) = -SN
Z( 2, M ) = CS
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
@ -550,8 +567,13 @@
M = M+1
W( M ) = R1
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = -SN
Z( 2, M ) = CS
ELSE
Z( 1, M ) = CS
Z( 2, M ) = SN
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN

View File

@ -164,7 +164,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleGEsolve
*> \ingroup gelss
*
* =====================================================================
SUBROUTINE DGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
@ -203,7 +203,7 @@
* ..
* .. External Subroutines ..
EXTERNAL DBDSQR, DCOPY, DGEBRD, DGELQF, DGEMM, DGEMV,
$ DGEQRF, DLABAD, DLACPY, DLASCL, DLASET, DORGBR,
$ DGEQRF, DLACPY, DLASCL, DLASET, DORGBR,
$ DORMBR, DORMLQ, DORMQR, DRSCL, XERBLA
* ..
* .. External Functions ..
@ -385,7 +385,6 @@
SFMIN = DLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Scale A if max element outside range [SMLNUM,BIGNUM]
*
@ -529,7 +528,7 @@
$ LDB, ZERO, WORK, N )
CALL DLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
20 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL DGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL DCOPY( N, WORK, 1, B, 1 )
END IF
@ -626,7 +625,7 @@
CALL DLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
$ LDB )
40 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL DGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
$ 1, ZERO, WORK( IWORK ), 1 )
CALL DCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
@ -712,7 +711,7 @@
$ LDB, ZERO, WORK, N )
CALL DLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
60 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL DGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL DCOPY( N, WORK, 1, B, 1 )
END IF

View File

@ -303,7 +303,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup doubleOTHERcomputational
*> \ingroup stemr
*
*> \par Contributors:
* ==================
@ -312,7 +312,8 @@
*> Jim Demmel, University of California, Berkeley, USA \n
*> Inderjit Dhillon, University of Texas, Austin, USA \n
*> Osni Marques, LBNL/NERSC, USA \n
*> Christof Voemel, University of California, Berkeley, USA
*> Christof Voemel, University of California, Berkeley, USA \n
*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n
*
* =====================================================================
SUBROUTINE DSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU,
@ -344,7 +345,8 @@
$ MINRGP = 1.0D-3 )
* ..
* .. Local Scalars ..
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY,
$ LAESWAP
INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW,
$ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD,
$ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP,
@ -380,6 +382,7 @@
*
LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) )
ZQUERY = ( NZC.EQ.-1 )
LAESWAP = .FALSE.
* DSTEMR needs WORK of size 6*N, IWORK of size 3*N.
* In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N.
@ -502,6 +505,15 @@
ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
CALL DLAEV2( D(1), E(1), D(2), R1, R2, CS, SN )
END IF
* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However,
* the following code requires R1 >= R2. Hence, we correct
* the order of R1, R2, CS, SN if R1 < R2 before further processing.
IF( R1.LT.R2 ) THEN
E(2) = R1
R1 = R2
R2 = E(2)
LAESWAP = .TRUE.
ENDIF
IF( ALLEIG.OR.
$ (VALEIG.AND.(R2.GT.WL).AND.
$ (R2.LE.WU)).OR.
@ -509,8 +521,13 @@
M = M+1
W( M ) = R2
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = CS
Z( 2, M ) = SN
ELSE
Z( 1, M ) = -SN
Z( 2, M ) = CS
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
@ -533,8 +550,13 @@
M = M+1
W( M ) = R1
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = -SN
Z( 2, M ) = CS
ELSE
Z( 1, M ) = CS
Z( 2, M ) = SN
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN

View File

@ -164,7 +164,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup realGEsolve
*> \ingroup gelss
*
* =====================================================================
SUBROUTINE SGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
@ -202,7 +202,7 @@
* ..
* .. External Subroutines ..
EXTERNAL SBDSQR, SCOPY, SGEBRD, SGELQF, SGEMM, SGEMV,
$ SGEQRF, SLABAD, SLACPY, SLASCL, SLASET, SORGBR,
$ SGEQRF, SLACPY, SLASCL, SLASET, SORGBR,
$ SORMBR, SORMLQ, SORMQR, SRSCL, XERBLA
* ..
* .. External Functions ..
@ -381,7 +381,6 @@
SFMIN = SLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL SLABAD( SMLNUM, BIGNUM )
*
* Scale A if max element outside range [SMLNUM,BIGNUM]
*
@ -525,7 +524,7 @@
$ LDB, ZERO, WORK, N )
CALL SLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
20 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL SGEMV( 'T', N, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL SCOPY( N, WORK, 1, B, 1 )
END IF
@ -622,7 +621,7 @@
CALL SLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
$ LDB )
40 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL SGEMV( 'T', M, M, ONE, WORK( IL ), LDWORK, B( 1, 1 ),
$ 1, ZERO, WORK( IWORK ), 1 )
CALL SCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
@ -708,7 +707,7 @@
$ LDB, ZERO, WORK, N )
CALL SLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
60 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL SGEMV( 'T', M, N, ONE, A, LDA, B, 1, ZERO, WORK, 1 )
CALL SCOPY( N, WORK, 1, B, 1 )
END IF

View File

@ -303,7 +303,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup realOTHERcomputational
*> \ingroup stemr
*
*> \par Contributors:
* ==================
@ -312,7 +312,8 @@
*> Jim Demmel, University of California, Berkeley, USA \n
*> Inderjit Dhillon, University of Texas, Austin, USA \n
*> Osni Marques, LBNL/NERSC, USA \n
*> Christof Voemel, University of California, Berkeley, USA
*> Christof Voemel, University of California, Berkeley, USA \n
*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n
*
* =====================================================================
SUBROUTINE SSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU,
@ -344,7 +345,8 @@
$ MINRGP = 3.0E-3 )
* ..
* .. Local Scalars ..
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY,
$ LAESWAP
INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW,
$ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD,
$ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP,
@ -378,6 +380,7 @@
*
LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) )
ZQUERY = ( NZC.EQ.-1 )
LAESWAP = .FALSE.
* SSTEMR needs WORK of size 6*N, IWORK of size 3*N.
* In addition, SLARRE needs WORK of size 6*N, IWORK of size 5*N.
@ -500,6 +503,15 @@
ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
CALL SLAEV2( D(1), E(1), D(2), R1, R2, CS, SN )
END IF
* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However,
* the following code requires R1 >= R2. Hence, we correct
* the order of R1, R2, CS, SN if R1 < R2 before further processing.
IF( R1.LT.R2 ) THEN
E(2) = R1
R1 = R2
R2 = E(2)
LAESWAP = .TRUE.
ENDIF
IF( ALLEIG.OR.
$ (VALEIG.AND.(R2.GT.WL).AND.
$ (R2.LE.WU)).OR.
@ -507,8 +519,13 @@
M = M+1
W( M ) = R2
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = CS
Z( 2, M ) = SN
ELSE
Z( 1, M ) = -SN
Z( 2, M ) = CS
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
@ -531,8 +548,13 @@
M = M+1
W( M ) = R1
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = -SN
Z( 2, M ) = CS
ELSE
Z( 1, M ) = CS
Z( 2, M ) = SN
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN

View File

@ -170,7 +170,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex16GEsolve
*> \ingroup gelss
*
* =====================================================================
SUBROUTINE ZGELSS( M, N, NRHS, A, LDA, B, LDB, S, RCOND, RANK,
@ -212,10 +212,9 @@
COMPLEX*16 DUM( 1 )
* ..
* .. External Subroutines ..
EXTERNAL DLABAD, DLASCL, DLASET, XERBLA, ZBDSQR, ZCOPY,
$ ZDRSCL, ZGEBRD, ZGELQF, ZGEMM, ZGEMV, ZGEQRF,
$ ZLACPY, ZLASCL, ZLASET, ZUNGBR, ZUNMBR, ZUNMLQ,
$ ZUNMQR
EXTERNAL DLASCL, DLASET, XERBLA, ZBDSQR, ZCOPY, ZDRSCL,
$ ZGEBRD, ZGELQF, ZGEMM, ZGEMV, ZGEQRF, ZLACPY,
$ ZLASCL, ZLASET, ZUNGBR, ZUNMBR, ZUNMLQ
* ..
* .. External Functions ..
INTEGER ILAENV
@ -388,7 +387,6 @@
SFMIN = DLAMCH( 'S' )
SMLNUM = SFMIN / EPS
BIGNUM = ONE / SMLNUM
CALL DLABAD( SMLNUM, BIGNUM )
*
* Scale A if max element outside range [SMLNUM,BIGNUM]
*
@ -540,7 +538,7 @@
$ LDB, CZERO, WORK, N )
CALL ZLACPY( 'G', N, BL, WORK, N, B( 1, I ), LDB )
20 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL ZGEMV( 'C', N, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
CALL ZCOPY( N, WORK, 1, B, 1 )
END IF
@ -645,7 +643,7 @@
CALL ZLACPY( 'G', M, BL, WORK( IWORK ), M, B( 1, I ),
$ LDB )
40 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL ZGEMV( 'C', M, M, CONE, WORK( IL ), LDWORK, B( 1, 1 ),
$ 1, CZERO, WORK( IWORK ), 1 )
CALL ZCOPY( M, WORK( IWORK ), 1, B( 1, 1 ), 1 )
@ -737,7 +735,7 @@
$ LDB, CZERO, WORK, N )
CALL ZLACPY( 'F', N, BL, WORK, N, B( 1, I ), LDB )
60 CONTINUE
ELSE
ELSE IF( NRHS.EQ.1 ) THEN
CALL ZGEMV( 'C', M, N, CONE, A, LDA, B, 1, CZERO, WORK, 1 )
CALL ZCOPY( N, WORK, 1, B, 1 )
END IF

View File

@ -101,7 +101,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex16GEcomputational
*> \ingroup getf2
*
* =====================================================================
SUBROUTINE ZGETF2( M, N, A, LDA, IPIV, INFO )
@ -127,7 +127,7 @@
* ..
* .. Local Scalars ..
DOUBLE PRECISION SFMIN
INTEGER I, J, JP
INTEGER J, JP
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
@ -135,7 +135,7 @@
EXTERNAL DLAMCH, IZAMAX
* ..
* .. External Subroutines ..
EXTERNAL XERBLA, ZGERU, ZSCAL, ZSWAP
EXTERNAL XERBLA, ZGERU, ZRSCL, ZSWAP
* ..
* .. Intrinsic Functions ..
INTRINSIC MAX, MIN
@ -181,15 +181,8 @@
*
* Compute elements J+1:M of J-th column.
*
IF( J.LT.M ) THEN
IF( ABS(A( J, J )) .GE. SFMIN ) THEN
CALL ZSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
ELSE
DO 20 I = 1, M-J
A( J+I, J ) = A( J+I, J ) / A( J, J )
20 CONTINUE
END IF
END IF
IF( J.LT.M )
$ CALL ZRSCL( M-J, A( J, J ), A( J+1, J ), 1 )
*
ELSE IF( INFO.EQ.0 ) THEN
*

735
lapack-netlib/SRC/zrscl.c Normal file
View File

@ -0,0 +1,735 @@
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <complex.h>
#ifdef complex
#undef complex
#endif
#ifdef I
#undef I
#endif
#if defined(_WIN64)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#ifdef LAPACK_ILP64
typedef BLASLONG blasint;
#if defined(_WIN64)
#define blasabs(x) llabs(x)
#else
#define blasabs(x) labs(x)
#endif
#else
typedef int blasint;
#define blasabs(x) abs(x)
#endif
typedef blasint integer;
typedef unsigned int uinteger;
typedef char *address;
typedef short int shortint;
typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
typedef char integer1;
#define TRUE_ (1)
#define FALSE_ (0)
/* Extern is for use with -E */
#ifndef Extern
#define Extern extern
#endif
/* I/O stuff */
typedef int flag;
typedef int ftnlen;
typedef int ftnint;
/*external read, write*/
typedef struct
{ flag cierr;
ftnint ciunit;
flag ciend;
char *cifmt;
ftnint cirec;
} cilist;
/*internal read, write*/
typedef struct
{ flag icierr;
char *iciunit;
flag iciend;
char *icifmt;
ftnint icirlen;
ftnint icirnum;
} icilist;
/*open*/
typedef struct
{ flag oerr;
ftnint ounit;
char *ofnm;
ftnlen ofnmlen;
char *osta;
char *oacc;
char *ofm;
ftnint orl;
char *oblnk;
} olist;
/*close*/
typedef struct
{ flag cerr;
ftnint cunit;
char *csta;
} cllist;
/*rewind, backspace, endfile*/
typedef struct
{ flag aerr;
ftnint aunit;
} alist;
/* inquire */
typedef struct
{ flag inerr;
ftnint inunit;
char *infile;
ftnlen infilen;
ftnint *inex; /*parameters in standard's order*/
ftnint *inopen;
ftnint *innum;
ftnint *innamed;
char *inname;
ftnlen innamlen;
char *inacc;
ftnlen inacclen;
char *inseq;
ftnlen inseqlen;
char *indir;
ftnlen indirlen;
char *infmt;
ftnlen infmtlen;
char *inform;
ftnint informlen;
char *inunf;
ftnlen inunflen;
ftnint *inrecl;
ftnint *innrec;
char *inblank;
ftnlen inblanklen;
} inlist;
#define VOID void
union Multitype { /* for multiple entry points */
integer1 g;
shortint h;
integer i;
/* longint j; */
real r;
doublereal d;
complex c;
doublecomplex z;
};
typedef union Multitype Multitype;
struct Vardesc { /* for Namelist */
char *name;
char *addr;
ftnlen *dims;
int type;
};
typedef struct Vardesc Vardesc;
struct Namelist {
char *name;
Vardesc **vars;
int nvars;
};
typedef struct Namelist Namelist;
#define abs(x) ((x) >= 0 ? (x) : -(x))
#define dabs(x) (fabs(x))
#define f2cmin(a,b) ((a) <= (b) ? (a) : (b))
#define f2cmax(a,b) ((a) >= (b) ? (a) : (b))
#define dmin(a,b) (f2cmin(a,b))
#define dmax(a,b) (f2cmax(a,b))
#define bit_test(a,b) ((a) >> (b) & 1)
#define bit_clear(a,b) ((a) & ~((uinteger)1 << (b)))
#define bit_set(a,b) ((a) | ((uinteger)1 << (b)))
#define abort_() { sig_die("Fortran abort routine called", 1); }
#define c_abs(z) (cabsf(Cf(z)))
#define c_cos(R,Z) { pCf(R)=ccos(Cf(Z)); }
#ifdef _MSC_VER
#define c_div(c, a, b) {Cf(c)._Val[0] = (Cf(a)._Val[0]/Cf(b)._Val[0]); Cf(c)._Val[1]=(Cf(a)._Val[1]/Cf(b)._Val[1]);}
#define z_div(c, a, b) {Cd(c)._Val[0] = (Cd(a)._Val[0]/Cd(b)._Val[0]); Cd(c)._Val[1]=(Cd(a)._Val[1]/Cd(b)._Val[1]);}
#else
#define c_div(c, a, b) {pCf(c) = Cf(a)/Cf(b);}
#define z_div(c, a, b) {pCd(c) = Cd(a)/Cd(b);}
#endif
#define c_exp(R, Z) {pCf(R) = cexpf(Cf(Z));}
#define c_log(R, Z) {pCf(R) = clogf(Cf(Z));}
#define c_sin(R, Z) {pCf(R) = csinf(Cf(Z));}
//#define c_sqrt(R, Z) {*(R) = csqrtf(Cf(Z));}
#define c_sqrt(R, Z) {pCf(R) = csqrtf(Cf(Z));}
#define d_abs(x) (fabs(*(x)))
#define d_acos(x) (acos(*(x)))
#define d_asin(x) (asin(*(x)))
#define d_atan(x) (atan(*(x)))
#define d_atn2(x, y) (atan2(*(x),*(y)))
#define d_cnjg(R, Z) { pCd(R) = conj(Cd(Z)); }
#define r_cnjg(R, Z) { pCf(R) = conjf(Cf(Z)); }
#define d_cos(x) (cos(*(x)))
#define d_cosh(x) (cosh(*(x)))
#define d_dim(__a, __b) ( *(__a) > *(__b) ? *(__a) - *(__b) : 0.0 )
#define d_exp(x) (exp(*(x)))
#define d_imag(z) (cimag(Cd(z)))
#define r_imag(z) (cimagf(Cf(z)))
#define d_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define r_int(__x) (*(__x)>0 ? floor(*(__x)) : -floor(- *(__x)))
#define d_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define r_lg10(x) ( 0.43429448190325182765 * log(*(x)) )
#define d_log(x) (log(*(x)))
#define d_mod(x, y) (fmod(*(x), *(y)))
#define u_nint(__x) ((__x)>=0 ? floor((__x) + .5) : -floor(.5 - (__x)))
#define d_nint(x) u_nint(*(x))
#define u_sign(__a,__b) ((__b) >= 0 ? ((__a) >= 0 ? (__a) : -(__a)) : -((__a) >= 0 ? (__a) : -(__a)))
#define d_sign(a,b) u_sign(*(a),*(b))
#define r_sign(a,b) u_sign(*(a),*(b))
#define d_sin(x) (sin(*(x)))
#define d_sinh(x) (sinh(*(x)))
#define d_sqrt(x) (sqrt(*(x)))
#define d_tan(x) (tan(*(x)))
#define d_tanh(x) (tanh(*(x)))
#define i_abs(x) abs(*(x))
#define i_dnnt(x) ((integer)u_nint(*(x)))
#define i_len(s, n) (n)
#define i_nint(x) ((integer)u_nint(*(x)))
#define i_sign(a,b) ((integer)u_sign((integer)*(a),(integer)*(b)))
#define pow_dd(ap, bp) ( pow(*(ap), *(bp)))
#define pow_si(B,E) spow_ui(*(B),*(E))
#define pow_ri(B,E) spow_ui(*(B),*(E))
#define pow_di(B,E) dpow_ui(*(B),*(E))
#define pow_zi(p, a, b) {pCd(p) = zpow_ui(Cd(a), *(b));}
#define pow_ci(p, a, b) {pCf(p) = cpow_ui(Cf(a), *(b));}
#define pow_zz(R,A,B) {pCd(R) = cpow(Cd(A),*(B));}
#define s_cat(lpp, rpp, rnp, np, llp) { ftnlen i, nc, ll; char *f__rp, *lp; ll = (llp); lp = (lpp); for(i=0; i < (int)*(np); ++i) { nc = ll; if((rnp)[i] < nc) nc = (rnp)[i]; ll -= nc; f__rp = (rpp)[i]; while(--nc >= 0) *lp++ = *(f__rp)++; } while(--ll >= 0) *lp++ = ' '; }
#define s_cmp(a,b,c,d) ((integer)strncmp((a),(b),f2cmin((c),(d))))
#define s_copy(A,B,C,D) { int __i,__m; for (__i=0, __m=f2cmin((C),(D)); __i<__m && (B)[__i] != 0; ++__i) (A)[__i] = (B)[__i]; }
#define sig_die(s, kill) { exit(1); }
#define s_stop(s, n) {exit(0);}
static char junk[] = "\n@(#)LIBF77 VERSION 19990503\n";
#define z_abs(z) (cabs(Cd(z)))
#define z_exp(R, Z) {pCd(R) = cexp(Cd(Z));}
#define z_sqrt(R, Z) {pCd(R) = csqrt(Cd(Z));}
#define myexit_() break;
#define mycycle_() continue;
#define myceiling_(w) {ceil(w)}
#define myhuge_(w) {HUGE_VAL}
//#define mymaxloc_(w,s,e,n) {if (sizeof(*(w)) == sizeof(double)) dmaxloc_((w),*(s),*(e),n); else dmaxloc_((w),*(s),*(e),n);}
#define mymaxloc_(w,s,e,n) dmaxloc_(w,*(s),*(e),n)
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* > \brief \b ZDRSCL multiplies a vector by the reciprocal of a real scalar. */
/* =========== DOCUMENTATION =========== */
/* Online html documentation available at */
/* http://www.netlib.org/lapack/explore-html/ */
/* > \htmlonly */
/* > Download ZDRSCL + dependencies */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zdrscl.
f"> */
/* > [TGZ]</a> */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zdrscl.
f"> */
/* > [ZIP]</a> */
/* > <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zdrscl.
f"> */
/* > [TXT]</a> */
/* > \endhtmlonly */
/* Definition: */
/* =========== */
/* SUBROUTINE ZRSCL( N, A, X, INCX ) */
/* INTEGER INCX, N */
/* COMPLEX*16 A */
/* COMPLEX*16 X( * ) */
/* > \par Purpose: */
/* ============= */
/* > */
/* > \verbatim */
/* > */
/* > ZRSCL multiplies an n-element complex vector x by the complex scalar */
/* > 1/a. This is done without overflow or underflow as long as */
/* > the final result x/a does not overflow or underflow. */
/* > \endverbatim */
/* Arguments: */
/* ========== */
/* > \param[in] N */
/* > \verbatim */
/* > N is INTEGER */
/* > The number of components of the vector x. */
/* > \endverbatim */
/* > */
/* > \param[in] A */
/* > \verbatim */
/* > A is COMPLEX*16 */
/* > The scalar a which is used to divide each component of x. */
/* > A must not be 0, or the subroutine will divide by zero. */
/* > \endverbatim */
/* > */
/* > \param[in,out] X */
/* > \verbatim */
/* > X is COMPLEX*16 array, dimension */
/* > (1+(N-1)*abs(INCX)) */
/* > The n-element vector x. */
/* > \endverbatim */
/* > */
/* > \param[in] INCX */
/* > \verbatim */
/* > INCX is INTEGER */
/* > The increment between successive values of the vector SX. */
/* > > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n */
/* > \endverbatim */
/* Authors: */
/* ======== */
/* > \author Univ. of Tennessee */
/* > \author Univ. of California Berkeley */
/* > \author Univ. of Colorado Denver */
/* > \author NAG Ltd. */
/* > \ingroup complex16OTHERauxiliary */
/* ===================================================================== */
/* Subroutine */ int zrscl_(integer *n, doublecomplex *a, doublecomplex *x,
integer *incx)
{
/* System generated locals */
doublereal d__1, d__2;
doublecomplex z__1;
/* Local variables */
doublereal absi, absr;
extern /* Subroutine */ int zscal_(integer *, doublecomplex *,
doublecomplex *, integer *);
doublereal ai, ar;
extern doublereal dlamch_(char *);
doublereal ui, ov, ur, safmin, safmax;
extern /* Subroutine */ int zdscal_(integer *, doublereal *,
doublecomplex *, integer *), zdrscl_(integer *, doublereal *,
doublecomplex *, integer *);
/* -- LAPACK auxiliary routine -- */
/* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
/* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
/* ===================================================================== */
/* Quick return if possible */
/* Parameter adjustments */
--x;
/* Function Body */
if (*n <= 0) {
return 0;
}
/* Get machine parameters */
safmin = dlamch_("S");
safmax = 1. / safmin;
ov = dlamch_("O");
/* Initialize constants related to A. */
ar = a->r;
ai = d_imag(a);
absr = abs(ar);
absi = abs(ai);
if (ai == 0.) {
/* If alpha is real, then we can use csrscl */
zdrscl_(n, &ar, &x[1], incx);
} else if (ar == 0.) {
/* If alpha has a zero real part, then we follow the same rules as if */
/* alpha were real. */
if (absi > safmax) {
zdscal_(n, &safmin, &x[1], incx);
d__1 = -safmax / ai;
z__1.r = 0., z__1.i = d__1;
zscal_(n, &z__1, &x[1], incx);
} else if (absi < safmin) {
d__1 = -safmin / ai;
z__1.r = 0., z__1.i = d__1;
zscal_(n, &z__1, &x[1], incx);
zdscal_(n, &safmax, &x[1], incx);
} else {
d__1 = -1. / ai;
z__1.r = 0., z__1.i = d__1;
zscal_(n, &z__1, &x[1], incx);
}
} else {
/* The following numbers can be computed. */
/* They are the inverse of the real and imaginary parts of 1/alpha. */
/* Note that a and b are always different from zero. */
/* NaNs are only possible if either: */
/* 1. alphaR or alphaI is NaN. */
/* 2. alphaR and alphaI are both infinite, in which case it makes sense */
/* to propagate a NaN. */
ur = ar + ai * (ai / ar);
ui = ai + ar * (ar / ai);
if (abs(ur) < safmin || abs(ui) < safmin) {
/* This means that both alphaR and alphaI are very small. */
d__1 = safmin / ur;
d__2 = -safmin / ui;
z__1.r = d__1, z__1.i = d__2;
zscal_(n, &z__1, &x[1], incx);
zdscal_(n, &safmax, &x[1], incx);
} else if (abs(ur) > safmax || abs(ui) > safmax) {
if (absr > ov || absi > ov) {
/* This means that a and b are both Inf. No need for scaling. */
d__1 = 1. / ur;
d__2 = -1. / ui;
z__1.r = d__1, z__1.i = d__2;
zscal_(n, &z__1, &x[1], incx);
} else {
zdscal_(n, &safmin, &x[1], incx);
if (abs(ur) > ov || abs(ui) > ov) {
/* Infs were generated. We do proper scaling to avoid them. */
if (absr >= absi) {
/* ABS( UR ) <= ABS( UI ) */
ur = safmin * ar + safmin * (ai * (ai / ar));
ui = safmin * ai + ar * (safmin * ar / ai);
} else {
/* ABS( UR ) > ABS( UI ) */
ur = safmin * ar + ai * (safmin * ai / ar);
ui = safmin * ai + safmin * (ar * (ar / ai));
}
d__1 = 1. / ur;
d__2 = -1. / ui;
z__1.r = d__1, z__1.i = d__2;
zscal_(n, &z__1, &x[1], incx);
} else {
d__1 = safmax / ur;
d__2 = -safmax / ui;
z__1.r = d__1, z__1.i = d__2;
zscal_(n, &z__1, &x[1], incx);
}
}
} else {
d__1 = 1. / ur;
d__2 = -1. / ui;
z__1.r = d__1, z__1.i = d__2;
zscal_(n, &z__1, &x[1], incx);
}
}
return 0;
/* End of ZRSCL */
} /* zrscl_ */

203
lapack-netlib/SRC/zrscl.f Normal file
View File

@ -0,0 +1,203 @@
*> \brief \b ZDRSCL multiplies a vector by the reciprocal of a real scalar.
*
* =========== DOCUMENTATION ===========
*
* Online html documentation available at
* http://www.netlib.org/lapack/explore-html/
*
*> \htmlonly
*> Download ZDRSCL + dependencies
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zdrscl.f">
*> [TGZ]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zdrscl.f">
*> [ZIP]</a>
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zdrscl.f">
*> [TXT]</a>
*> \endhtmlonly
*
* Definition:
* ===========
*
* SUBROUTINE ZRSCL( N, A, X, INCX )
*
* .. Scalar Arguments ..
* INTEGER INCX, N
* COMPLEX*16 A
* ..
* .. Array Arguments ..
* COMPLEX*16 X( * )
* ..
*
*
*> \par Purpose:
* =============
*>
*> \verbatim
*>
*> ZRSCL multiplies an n-element complex vector x by the complex scalar
*> 1/a. This is done without overflow or underflow as long as
*> the final result x/a does not overflow or underflow.
*> \endverbatim
*
* Arguments:
* ==========
*
*> \param[in] N
*> \verbatim
*> N is INTEGER
*> The number of components of the vector x.
*> \endverbatim
*>
*> \param[in] A
*> \verbatim
*> A is COMPLEX*16
*> The scalar a which is used to divide each component of x.
*> A must not be 0, or the subroutine will divide by zero.
*> \endverbatim
*>
*> \param[in,out] X
*> \verbatim
*> X is COMPLEX*16 array, dimension
*> (1+(N-1)*abs(INCX))
*> The n-element vector x.
*> \endverbatim
*>
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> The increment between successive values of the vector SX.
*> > 0: SX(1) = X(1) and SX(1+(i-1)*INCX) = x(i), 1< i<= n
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex16OTHERauxiliary
*
* =====================================================================
SUBROUTINE ZRSCL( N, A, X, INCX )
*
* -- LAPACK auxiliary routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
*
* .. Scalar Arguments ..
INTEGER INCX, N
COMPLEX*16 A
* ..
* .. Array Arguments ..
COMPLEX*16 X( * )
* ..
*
* =====================================================================
*
* .. Parameters ..
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
* ..
* .. Local Scalars ..
DOUBLE PRECISION SAFMAX, SAFMIN, OV, AR, AI, ABSR, ABSI, UR, UI
* ..
* .. External Functions ..
DOUBLE PRECISION DLAMCH
COMPLEX*16 ZLADIV
EXTERNAL DLAMCH, ZLADIV
* ..
* .. External Subroutines ..
EXTERNAL DSCAL, ZDSCAL, ZDRSCL
* ..
* .. Intrinsic Functions ..
INTRINSIC ABS
* ..
* .. Executable Statements ..
*
* Quick return if possible
*
IF( N.LE.0 )
$ RETURN
*
* Get machine parameters
*
SAFMIN = DLAMCH( 'S' )
SAFMAX = ONE / SAFMIN
OV = DLAMCH( 'O' )
*
* Initialize constants related to A.
*
AR = DBLE( A )
AI = DIMAG( A )
ABSR = ABS( AR )
ABSI = ABS( AI )
*
IF( AI.EQ.ZERO ) THEN
* If alpha is real, then we can use csrscl
CALL ZDRSCL( N, AR, X, INCX )
*
ELSE IF( AR.EQ.ZERO ) THEN
* If alpha has a zero real part, then we follow the same rules as if
* alpha were real.
IF( ABSI.GT.SAFMAX ) THEN
CALL ZDSCAL( N, SAFMIN, X, INCX )
CALL ZSCAL( N, DCMPLX( ZERO, -SAFMAX / AI ), X, INCX )
ELSE IF( ABSI.LT.SAFMIN ) THEN
CALL ZSCAL( N, DCMPLX( ZERO, -SAFMIN / AI ), X, INCX )
CALL ZDSCAL( N, SAFMAX, X, INCX )
ELSE
CALL ZSCAL( N, DCMPLX( ZERO, -ONE / AI ), X, INCX )
END IF
*
ELSE
* The following numbers can be computed.
* They are the inverse of the real and imaginary parts of 1/alpha.
* Note that a and b are always different from zero.
* NaNs are only possible if either:
* 1. alphaR or alphaI is NaN.
* 2. alphaR and alphaI are both infinite, in which case it makes sense
* to propagate a NaN.
UR = AR + AI * ( AI / AR )
UI = AI + AR * ( AR / AI )
*
IF( (ABS( UR ).LT.SAFMIN).OR.(ABS( UI ).LT.SAFMIN) ) THEN
* This means that both alphaR and alphaI are very small.
CALL ZSCAL( N, DCMPLX( SAFMIN / UR, -SAFMIN / UI ), X,
$ INCX )
CALL ZDSCAL( N, SAFMAX, X, INCX )
ELSE IF( (ABS( UR ).GT.SAFMAX).OR.(ABS( UI ).GT.SAFMAX) ) THEN
IF( (ABSR.GT.OV).OR.(ABSI.GT.OV) ) THEN
* This means that a and b are both Inf. No need for scaling.
CALL ZSCAL( N, DCMPLX( ONE / UR, -ONE / UI ), X, INCX )
ELSE
CALL ZDSCAL( N, SAFMIN, X, INCX )
IF( (ABS( UR ).GT.OV).OR.(ABS( UI ).GT.OV) ) THEN
* Infs were generated. We do proper scaling to avoid them.
IF( ABSR.GE.ABSI ) THEN
* ABS( UR ) <= ABS( UI )
UR = (SAFMIN * AR) + SAFMIN * (AI * ( AI / AR ))
UI = (SAFMIN * AI) + AR * ( (SAFMIN * AR) / AI )
ELSE
* ABS( UR ) > ABS( UI )
UR = (SAFMIN * AR) + AI * ( (SAFMIN * AI) / AR )
UI = (SAFMIN * AI) + SAFMIN * (AR * ( AR / AI ))
END IF
CALL ZSCAL( N, DCMPLX( ONE / UR, -ONE / UI ), X,
$ INCX )
ELSE
CALL ZSCAL( N, DCMPLX( SAFMAX / UR, -SAFMAX / UI ),
$ X, INCX )
END IF
END IF
ELSE
CALL ZSCAL( N, DCMPLX( ONE / UR, -ONE / UI ), X, INCX )
END IF
END IF
*
RETURN
*
* End of ZRSCL
*
END

View File

@ -320,7 +320,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \ingroup complex16OTHERcomputational
*> \ingroup stemr
*
*> \par Contributors:
* ==================
@ -330,6 +330,7 @@
*> Inderjit Dhillon, University of Texas, Austin, USA \n
*> Osni Marques, LBNL/NERSC, USA \n
*> Christof Voemel, University of California, Berkeley, USA \n
*> Aravindh Krishnamoorthy, FAU, Erlangen, Germany \n
*
* =====================================================================
SUBROUTINE ZSTEMR( JOBZ, RANGE, N, D, E, VL, VU, IL, IU,
@ -361,7 +362,8 @@
$ MINRGP = 1.0D-3 )
* ..
* .. Local Scalars ..
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY
LOGICAL ALLEIG, INDEIG, LQUERY, VALEIG, WANTZ, ZQUERY,
$ LAESWAP
INTEGER I, IBEGIN, IEND, IFIRST, IIL, IINDBL, IINDW,
$ IINDWK, IINFO, IINSPL, IIU, ILAST, IN, INDD,
$ INDE2, INDERR, INDGP, INDGRS, INDWRK, ITMP,
@ -397,6 +399,7 @@
*
LQUERY = ( ( LWORK.EQ.-1 ).OR.( LIWORK.EQ.-1 ) )
ZQUERY = ( NZC.EQ.-1 )
LAESWAP = .FALSE.
* DSTEMR needs WORK of size 6*N, IWORK of size 3*N.
* In addition, DLARRE needs WORK of size 6*N, IWORK of size 5*N.
@ -519,6 +522,15 @@
ELSE IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
CALL DLAEV2( D(1), E(1), D(2), R1, R2, CS, SN )
END IF
* D/S/LAE2 and D/S/LAEV2 outputs satisfy |R1| >= |R2|. However,
* the following code requires R1 >= R2. Hence, we correct
* the order of R1, R2, CS, SN if R1 < R2 before further processing.
IF( R1.LT.R2 ) THEN
E(2) = R1
R1 = R2
R2 = E(2)
LAESWAP = .TRUE.
ENDIF
IF( ALLEIG.OR.
$ (VALEIG.AND.(R2.GT.WL).AND.
$ (R2.LE.WU)).OR.
@ -526,8 +538,13 @@
M = M+1
W( M ) = R2
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = CS
Z( 2, M ) = SN
ELSE
Z( 1, M ) = -SN
Z( 2, M ) = CS
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN
@ -550,8 +567,13 @@
M = M+1
W( M ) = R1
IF( WANTZ.AND.(.NOT.ZQUERY) ) THEN
IF( LAESWAP ) THEN
Z( 1, M ) = -SN
Z( 2, M ) = CS
ELSE
Z( 1, M ) = CS
Z( 2, M ) = SN
ENDIF
* Note: At most one of SN and CS can be zero.
IF (SN.NE.ZERO) THEN
IF (CS.NE.ZERO) THEN