commit
61fae59298
11
.travis.yml
11
.travis.yml
|
@ -204,6 +204,17 @@ matrix:
|
||||||
env:
|
env:
|
||||||
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
|
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
|
||||||
|
|
||||||
|
- <<: *test-macos
|
||||||
|
osx_image: xcode12
|
||||||
|
before_script:
|
||||||
|
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||||
|
- brew update
|
||||||
|
- brew install gcc@10 # for gfortran
|
||||||
|
script:
|
||||||
|
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||||
|
env:
|
||||||
|
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
|
||||||
|
|
||||||
- <<: *test-macos
|
- <<: *test-macos
|
||||||
osx_image: xcode10.0
|
osx_image: xcode10.0
|
||||||
env:
|
env:
|
||||||
|
|
|
@ -12,5 +12,5 @@ endif
|
||||||
# Enable floating-point expression contraction for clang, since it is the
|
# Enable floating-point expression contraction for clang, since it is the
|
||||||
# default for gcc
|
# default for gcc
|
||||||
ifeq ($(C_COMPILER), CLANG)
|
ifeq ($(C_COMPILER), CLANG)
|
||||||
CCOMMON_OPT += -ffp-contract=fast
|
CCOMMON_OPT += -ffp-contract=on
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -25,67 +25,35 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include "common.h"
|
/*
|
||||||
|
* Avoid contraction of floating point operations, specifically fused
|
||||||
|
* multiply-add, because they can cause unexpected results in complex
|
||||||
|
* multiplication.
|
||||||
|
*/
|
||||||
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
|
#pragma GCC optimize ("fp-contract=off")
|
||||||
|
#endif
|
||||||
|
|
||||||
static void cscal_kernel_16(BLASLONG n, FLOAT *alpha, FLOAT *x) {
|
#if defined(__clang__)
|
||||||
__asm__("vlrepf %%v0,0(%[alpha])\n\t"
|
#pragma clang fp contract(off)
|
||||||
"vlef %%v1,4(%[alpha]),0\n\t"
|
#endif
|
||||||
"vlef %%v1,4(%[alpha]),2\n\t"
|
|
||||||
"vflcsb %%v1,%%v1\n\t"
|
#include "common.h"
|
||||||
"vlef %%v1,4(%[alpha]),1\n\t"
|
#include "vector-common.h"
|
||||||
"vlef %%v1,4(%[alpha]),3\n\t"
|
|
||||||
"srlg %[n],%[n],4\n\t"
|
static void cscal_kernel_16(BLASLONG n, FLOAT da_r, FLOAT da_i, FLOAT *x) {
|
||||||
"xgr %%r1,%%r1\n\t"
|
vector_float da_r_vec = vec_splats(da_r);
|
||||||
"0:\n\t"
|
vector_float da_i_vec = { -da_i, da_i, -da_i, da_i };
|
||||||
"pfd 2, 1024(%%r1,%[x])\n\t"
|
|
||||||
"vl %%v16,0(%%r1,%[x])\n\t"
|
vector_float *x_vec_ptr = (vector_float *)x;
|
||||||
"vl %%v17,16(%%r1,%[x])\n\t"
|
|
||||||
"vl %%v18,32(%%r1,%[x])\n\t"
|
#pragma GCC unroll 16
|
||||||
"vl %%v19,48(%%r1,%[x])\n\t"
|
for (size_t i = 0; i < n/2; i++) {
|
||||||
"vl %%v20,64(%%r1,%[x])\n\t"
|
vector_float x_vec = vec_load_hinted(x + i * VLEN_FLOATS);
|
||||||
"vl %%v21,80(%%r1,%[x])\n\t"
|
vector_float x_swapped = {x_vec[1], x_vec[0], x_vec[3], x_vec[2]};
|
||||||
"vl %%v22,96(%%r1,%[x])\n\t"
|
|
||||||
"vl %%v23,112(%%r1,%[x])\n\t"
|
x_vec_ptr[i] = x_vec * da_r_vec + x_swapped * da_i_vec;
|
||||||
"verllg %%v24,%%v16,32\n\t"
|
}
|
||||||
"verllg %%v25,%%v17,32\n\t"
|
|
||||||
"verllg %%v26,%%v18,32\n\t"
|
|
||||||
"verllg %%v27,%%v19,32\n\t"
|
|
||||||
"verllg %%v28,%%v20,32\n\t"
|
|
||||||
"verllg %%v29,%%v21,32\n\t"
|
|
||||||
"verllg %%v30,%%v22,32\n\t"
|
|
||||||
"verllg %%v31,%%v23,32\n\t"
|
|
||||||
"vfmsb %%v16,%%v16,%%v0\n\t"
|
|
||||||
"vfmsb %%v17,%%v17,%%v0\n\t"
|
|
||||||
"vfmsb %%v18,%%v18,%%v0\n\t"
|
|
||||||
"vfmsb %%v19,%%v19,%%v0\n\t"
|
|
||||||
"vfmsb %%v20,%%v20,%%v0\n\t"
|
|
||||||
"vfmsb %%v21,%%v21,%%v0\n\t"
|
|
||||||
"vfmsb %%v22,%%v22,%%v0\n\t"
|
|
||||||
"vfmsb %%v23,%%v23,%%v0\n\t"
|
|
||||||
"vfmasb %%v16,%%v24,%%v1,%%v16\n\t"
|
|
||||||
"vfmasb %%v17,%%v25,%%v1,%%v17\n\t"
|
|
||||||
"vfmasb %%v18,%%v26,%%v1,%%v18\n\t"
|
|
||||||
"vfmasb %%v19,%%v27,%%v1,%%v19\n\t"
|
|
||||||
"vfmasb %%v20,%%v28,%%v1,%%v20\n\t"
|
|
||||||
"vfmasb %%v21,%%v29,%%v1,%%v21\n\t"
|
|
||||||
"vfmasb %%v22,%%v30,%%v1,%%v22\n\t"
|
|
||||||
"vfmasb %%v23,%%v31,%%v1,%%v23\n\t"
|
|
||||||
"vst %%v16,0(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v17,16(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v18,32(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v19,48(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v20,64(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v21,80(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v22,96(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v23,112(%%r1,%[x])\n\t"
|
|
||||||
"agfi %%r1,128\n\t"
|
|
||||||
"brctg %[n],0b"
|
|
||||||
: "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n)
|
|
||||||
: [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha),
|
|
||||||
[alpha] "a"(alpha)
|
|
||||||
: "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21",
|
|
||||||
"v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
|
|
||||||
"v31");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cscal_kernel_16_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) {
|
static void cscal_kernel_16_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) {
|
||||||
|
@ -199,14 +167,12 @@ static void cscal_kernel_16_zero(BLASLONG n, FLOAT *x) {
|
||||||
: "cc", "r1", "v0");
|
: "cc", "r1", "v0");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void cscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x,
|
static void cscal_kernel_inc_8(BLASLONG n, FLOAT da_r, FLOAT da_i, FLOAT *x,
|
||||||
BLASLONG inc_x) {
|
BLASLONG inc_x) {
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
BLASLONG inc_x2 = 2 * inc_x;
|
BLASLONG inc_x2 = 2 * inc_x;
|
||||||
BLASLONG inc_x3 = inc_x2 + inc_x;
|
BLASLONG inc_x3 = inc_x2 + inc_x;
|
||||||
FLOAT t0, t1, t2, t3;
|
FLOAT t0, t1, t2, t3;
|
||||||
FLOAT da_r = alpha[0];
|
|
||||||
FLOAT da_i = alpha[1];
|
|
||||||
|
|
||||||
for (i = 0; i < n; i += 4) {
|
for (i = 0; i < n; i += 4) {
|
||||||
t0 = da_r * x[0] - da_i * x[1];
|
t0 = da_r * x[0] - da_i * x[1];
|
||||||
|
@ -324,9 +290,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
|
|
||||||
BLASLONG n1 = n & -8;
|
BLASLONG n1 = n & -8;
|
||||||
if (n1 > 0) {
|
if (n1 > 0) {
|
||||||
alpha[0] = da_r;
|
cscal_kernel_inc_8(n1, da_r, da_i, x, inc_x);
|
||||||
alpha[1] = da_i;
|
|
||||||
cscal_kernel_inc_8(n1, alpha, x, inc_x);
|
|
||||||
j = n1;
|
j = n1;
|
||||||
i = n1 * inc_x;
|
i = n1 * inc_x;
|
||||||
}
|
}
|
||||||
|
@ -362,7 +326,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
else if (da_i == 0)
|
else if (da_i == 0)
|
||||||
cscal_kernel_16_zero_i(n1, alpha, x);
|
cscal_kernel_16_zero_i(n1, alpha, x);
|
||||||
else
|
else
|
||||||
cscal_kernel_16(n1, alpha, x);
|
cscal_kernel_16(n1, da_r, da_i, x);
|
||||||
|
|
||||||
i = n1 << 1;
|
i = n1 << 1;
|
||||||
j = n1;
|
j = n1;
|
||||||
|
|
|
@ -30,12 +30,13 @@
|
||||||
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*/
|
*/
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include <vecintrin.h>
|
#include "vector-common.h"
|
||||||
|
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
|
||||||
#ifdef COMPLEX
|
#ifdef COMPLEX
|
||||||
#error "Handling for complex numbers is not supported in this kernel"
|
#error "Handling for complex numbers is not supported in this kernel"
|
||||||
#endif
|
#endif
|
||||||
|
@ -153,37 +154,6 @@ static const bool backwards = false;
|
||||||
* 3, May 2008.
|
* 3, May 2008.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define VLEN_BYTES 16
|
|
||||||
#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
|
|
||||||
|
|
||||||
typedef FLOAT vector_float __attribute__ ((vector_size (16)));
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Load a vector into register, and hint on 8-byte alignment to improve
|
|
||||||
* performance. gcc-9 and newer will create these hints by itself. For older
|
|
||||||
* compiler versions, use inline assembly to explicitly express the hint.
|
|
||||||
* Provide explicit hex encoding to cater for binutils versions that do not know
|
|
||||||
* about vector-load with alignment hints yet.
|
|
||||||
*
|
|
||||||
* Note that, for block sizes where we apply vectorization, vectors in A will
|
|
||||||
* always be 8-byte aligned.
|
|
||||||
*/
|
|
||||||
static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
|
|
||||||
vector_float const *restrict addr = (vector_float const *restrict)a;
|
|
||||||
vector_float y;
|
|
||||||
|
|
||||||
#if __GNUC__ < 9 && !defined(__clang__)
|
|
||||||
// hex-encode vl %[out],%[addr],3
|
|
||||||
asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
|
|
||||||
: [ out ] "=v"(y)
|
|
||||||
: [ addr ] "R"(*addr));
|
|
||||||
#else
|
|
||||||
y = *addr;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return y;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate for a row-block in C_i of size ROWSxCOLS using vector intrinsics.
|
* Calculate for a row-block in C_i of size ROWSxCOLS using vector intrinsics.
|
||||||
*
|
*
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) IBM Corporation 2020.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in
|
||||||
|
* the documentation and/or other materials provided with the
|
||||||
|
* distribution.
|
||||||
|
* 3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
* its contributors may be used to endorse or promote products
|
||||||
|
* derived from this software without specific prior written
|
||||||
|
* permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <vecintrin.h>
|
||||||
|
|
||||||
|
#define VLEN_BYTES 16
|
||||||
|
#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
|
||||||
|
|
||||||
|
typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES)));
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load a vector into register, and hint on 8-byte alignment to improve
|
||||||
|
* performance. gcc-9 and newer will create these hints by itself. For older
|
||||||
|
* compiler versions, use inline assembly to explicitly express the hint.
|
||||||
|
* Provide explicit hex encoding to cater for binutils versions that do not know
|
||||||
|
* about vector-load with alignment hints yet.
|
||||||
|
*
|
||||||
|
* Note that, for block sizes where we apply vectorization, vectors in A will
|
||||||
|
* always be 8-byte aligned.
|
||||||
|
*/
|
||||||
|
static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
|
||||||
|
vector_float const *restrict addr = (vector_float const *restrict)a;
|
||||||
|
vector_float y;
|
||||||
|
|
||||||
|
#if __GNUC__ < 9 && !defined(__clang__)
|
||||||
|
// hex-encode vl %[out],%[addr],3
|
||||||
|
asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
|
||||||
|
: [ out ] "=v"(y)
|
||||||
|
: [ addr ] "R"(*addr));
|
||||||
|
#else
|
||||||
|
y = *addr;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
return y;
|
||||||
|
}
|
|
@ -25,65 +25,35 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*****************************************************************************/
|
*****************************************************************************/
|
||||||
|
|
||||||
#include "common.h"
|
/*
|
||||||
|
* Avoid contraction of floating point operations, specifically fused
|
||||||
|
* multiply-add, because they can cause unexpected results in complex
|
||||||
|
* multiplication.
|
||||||
|
*/
|
||||||
|
#if defined(__GNUC__) && !defined(__clang__)
|
||||||
|
#pragma GCC optimize ("fp-contract=off")
|
||||||
|
#endif
|
||||||
|
|
||||||
static void zscal_kernel_8(BLASLONG n, FLOAT *alpha, FLOAT *x) {
|
#if defined(__clang__)
|
||||||
__asm__("vlrepg %%v0,0(%[alpha])\n\t"
|
#pragma clang fp contract(off)
|
||||||
"vleg %%v1,8(%[alpha]),0\n\t"
|
#endif
|
||||||
"wflcdb %%v1,%%v1\n\t"
|
|
||||||
"vleg %%v1,8(%[alpha]),1\n\t"
|
#include "common.h"
|
||||||
"srlg %[n],%[n],3\n\t"
|
#include "vector-common.h"
|
||||||
"xgr %%r1,%%r1\n\t"
|
|
||||||
"0:\n\t"
|
static void zscal_kernel_8(BLASLONG n, FLOAT da_r, FLOAT da_i, FLOAT *x) {
|
||||||
"pfd 2, 1024(%%r1,%[x])\n\t"
|
vector_float da_r_vec = vec_splats(da_r);
|
||||||
"vl %%v16,0(%%r1,%[x])\n\t"
|
vector_float da_i_vec = { -da_i, da_i };
|
||||||
"vl %%v17,16(%%r1,%[x])\n\t"
|
|
||||||
"vl %%v18,32(%%r1,%[x])\n\t"
|
vector_float * x_vec_ptr = (vector_float *)x;
|
||||||
"vl %%v19,48(%%r1,%[x])\n\t"
|
|
||||||
"vl %%v20,64(%%r1,%[x])\n\t"
|
#pragma GCC unroll 16
|
||||||
"vl %%v21,80(%%r1,%[x])\n\t"
|
for (size_t i = 0; i < n; i++) {
|
||||||
"vl %%v22,96(%%r1,%[x])\n\t"
|
vector_float x_vec = vec_load_hinted(x + i * VLEN_FLOATS);
|
||||||
"vl %%v23,112(%%r1,%[x])\n\t"
|
vector_float x_swapped = {x_vec[1], x_vec[0]};
|
||||||
"vpdi %%v24,%%v16,%%v16,4\n\t"
|
|
||||||
"vpdi %%v25,%%v17,%%v17,4\n\t"
|
x_vec_ptr[i] = x_vec * da_r_vec + x_swapped * da_i_vec;
|
||||||
"vpdi %%v26,%%v18,%%v18,4\n\t"
|
}
|
||||||
"vpdi %%v27,%%v19,%%v19,4\n\t"
|
|
||||||
"vpdi %%v28,%%v20,%%v20,4\n\t"
|
|
||||||
"vpdi %%v29,%%v21,%%v21,4\n\t"
|
|
||||||
"vpdi %%v30,%%v22,%%v22,4\n\t"
|
|
||||||
"vpdi %%v31,%%v23,%%v23,4\n\t"
|
|
||||||
"vfmdb %%v16,%%v16,%%v0\n\t"
|
|
||||||
"vfmdb %%v17,%%v17,%%v0\n\t"
|
|
||||||
"vfmdb %%v18,%%v18,%%v0\n\t"
|
|
||||||
"vfmdb %%v19,%%v19,%%v0\n\t"
|
|
||||||
"vfmdb %%v20,%%v20,%%v0\n\t"
|
|
||||||
"vfmdb %%v21,%%v21,%%v0\n\t"
|
|
||||||
"vfmdb %%v22,%%v22,%%v0\n\t"
|
|
||||||
"vfmdb %%v23,%%v23,%%v0\n\t"
|
|
||||||
"vfmadb %%v16,%%v24,%%v1,%%v16\n\t"
|
|
||||||
"vfmadb %%v17,%%v25,%%v1,%%v17\n\t"
|
|
||||||
"vfmadb %%v18,%%v26,%%v1,%%v18\n\t"
|
|
||||||
"vfmadb %%v19,%%v27,%%v1,%%v19\n\t"
|
|
||||||
"vfmadb %%v20,%%v28,%%v1,%%v20\n\t"
|
|
||||||
"vfmadb %%v21,%%v29,%%v1,%%v21\n\t"
|
|
||||||
"vfmadb %%v22,%%v30,%%v1,%%v22\n\t"
|
|
||||||
"vfmadb %%v23,%%v31,%%v1,%%v23\n\t"
|
|
||||||
"vst %%v16,0(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v17,16(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v18,32(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v19,48(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v20,64(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v21,80(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v22,96(%%r1,%[x])\n\t"
|
|
||||||
"vst %%v23,112(%%r1,%[x])\n\t"
|
|
||||||
"agfi %%r1,128\n\t"
|
|
||||||
"brctg %[n],0b"
|
|
||||||
: "+m"(*(FLOAT (*)[n * 2]) x),[n] "+&r"(n)
|
|
||||||
: [x] "a"(x), "m"(*(const FLOAT (*)[2]) alpha),
|
|
||||||
[alpha] "a"(alpha)
|
|
||||||
: "cc", "r1", "v0", "v1", "v16", "v17", "v18", "v19", "v20", "v21",
|
|
||||||
"v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30",
|
|
||||||
"v31");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) {
|
static void zscal_kernel_8_zero_r(BLASLONG n, FLOAT *alpha, FLOAT *x) {
|
||||||
|
@ -195,14 +165,12 @@ static void zscal_kernel_8_zero(BLASLONG n, FLOAT *x) {
|
||||||
: "cc", "r1", "v0");
|
: "cc", "r1", "v0");
|
||||||
}
|
}
|
||||||
|
|
||||||
static void zscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x,
|
static void zscal_kernel_inc_8(BLASLONG n, FLOAT da_r, FLOAT da_i, FLOAT *x,
|
||||||
BLASLONG inc_x) {
|
BLASLONG inc_x) {
|
||||||
BLASLONG i;
|
BLASLONG i;
|
||||||
BLASLONG inc_x2 = 2 * inc_x;
|
BLASLONG inc_x2 = 2 * inc_x;
|
||||||
BLASLONG inc_x3 = inc_x2 + inc_x;
|
BLASLONG inc_x3 = inc_x2 + inc_x;
|
||||||
FLOAT t0, t1, t2, t3;
|
FLOAT t0, t1, t2, t3;
|
||||||
FLOAT da_r = alpha[0];
|
|
||||||
FLOAT da_i = alpha[1];
|
|
||||||
|
|
||||||
for (i = 0; i < n; i += 4) {
|
for (i = 0; i < n; i += 4) {
|
||||||
t0 = da_r * x[0] - da_i * x[1];
|
t0 = da_r * x[0] - da_i * x[1];
|
||||||
|
@ -320,9 +288,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
|
|
||||||
BLASLONG n1 = n & -8;
|
BLASLONG n1 = n & -8;
|
||||||
if (n1 > 0) {
|
if (n1 > 0) {
|
||||||
alpha[0] = da_r;
|
zscal_kernel_inc_8(n1, da_r, da_i, x, inc_x);
|
||||||
alpha[1] = da_i;
|
|
||||||
zscal_kernel_inc_8(n1, alpha, x, inc_x);
|
|
||||||
j = n1;
|
j = n1;
|
||||||
i = n1 * inc_x;
|
i = n1 * inc_x;
|
||||||
}
|
}
|
||||||
|
@ -358,7 +324,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
|
||||||
else if (da_i == 0)
|
else if (da_i == 0)
|
||||||
zscal_kernel_8_zero_i(n1, alpha, x);
|
zscal_kernel_8_zero_i(n1, alpha, x);
|
||||||
else
|
else
|
||||||
zscal_kernel_8(n1, alpha, x);
|
zscal_kernel_8(n1, da_r, da_i, x);
|
||||||
|
|
||||||
i = n1 << 1;
|
i = n1 << 1;
|
||||||
j = n1;
|
j = n1;
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
*> where:
|
*> where:
|
||||||
*>
|
*>
|
||||||
*> Q is a N-by-N orthogonal matrix;
|
*> Q is a N-by-N orthogonal matrix;
|
||||||
*> L is an lower-triangular M-by-M matrix;
|
*> L is a lower-triangular M-by-M matrix;
|
||||||
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
||||||
*>
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
|
@ -187,7 +187,7 @@
|
||||||
* ..
|
* ..
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
LOGICAL LQUERY, LMINWS, MINT, MINW
|
LOGICAL LQUERY, LMINWS, MINT, MINW
|
||||||
INTEGER MB, NB, MINTSZ, NBLCKS
|
INTEGER MB, NB, MINTSZ, NBLCKS, LWMIN, LWOPT, LWREQ
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAME
|
LOGICAL LSAME
|
||||||
|
@ -243,20 +243,32 @@
|
||||||
*
|
*
|
||||||
* Determine if the workspace size satisfies minimal size
|
* Determine if the workspace size satisfies minimal size
|
||||||
*
|
*
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWMIN = MAX( 1, N )
|
||||||
|
LWOPT = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWMIN = MAX( 1, M )
|
||||||
|
LWOPT = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
LMINWS = .FALSE.
|
LMINWS = .FALSE.
|
||||||
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M )
|
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.LWOPT )
|
||||||
$ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ )
|
$ .AND. ( LWORK.GE.LWMIN ) .AND. ( TSIZE.GE.MINTSZ )
|
||||||
$ .AND. ( .NOT.LQUERY ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) ) THEN
|
||||||
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
NB = N
|
NB = N
|
||||||
END IF
|
END IF
|
||||||
IF( LWORK.LT.MB*M ) THEN
|
IF( LWORK.LT.LWOPT ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWREQ = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWREQ = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
*
|
*
|
||||||
IF( M.LT.0 ) THEN
|
IF( M.LT.0 ) THEN
|
||||||
INFO = -1
|
INFO = -1
|
||||||
|
@ -267,7 +279,7 @@
|
||||||
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
||||||
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -6
|
INFO = -6
|
||||||
ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY )
|
ELSE IF( ( LWORK.LT.LWREQ ) .AND .( .NOT.LQUERY )
|
||||||
$ .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -8
|
INFO = -8
|
||||||
END IF
|
END IF
|
||||||
|
@ -281,9 +293,9 @@
|
||||||
T( 2 ) = MB
|
T( 2 ) = MB
|
||||||
T( 3 ) = NB
|
T( 3 ) = NB
|
||||||
IF( MINW ) THEN
|
IF( MINW ) THEN
|
||||||
WORK( 1 ) = MAX( 1, N )
|
WORK( 1 ) = LWMIN
|
||||||
ELSE
|
ELSE
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
IF( INFO.NE.0 ) THEN
|
IF( INFO.NE.0 ) THEN
|
||||||
|
@ -308,7 +320,7 @@
|
||||||
$ LWORK, INFO )
|
$ LWORK, INFO )
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
*
|
*
|
||||||
RETURN
|
RETURN
|
||||||
*
|
*
|
||||||
|
|
|
@ -261,7 +261,7 @@
|
||||||
TSZM = INT( TQ( 1 ) )
|
TSZM = INT( TQ( 1 ) )
|
||||||
LWM = INT( WORKQ( 1 ) )
|
LWM = INT( WORKQ( 1 ) )
|
||||||
CALL CGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
CALL CGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
||||||
$ TSZO, B, LDB, WORKQ, -1, INFO2 )
|
$ TSZM, B, LDB, WORKQ, -1, INFO2 )
|
||||||
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
||||||
WSIZEO = TSZO + LWO
|
WSIZEO = TSZO + LWO
|
||||||
WSIZEM = TSZM + LWM
|
WSIZEM = TSZM + LWM
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
*> where:
|
*> where:
|
||||||
*>
|
*>
|
||||||
*> Q is a N-by-N orthogonal matrix;
|
*> Q is a N-by-N orthogonal matrix;
|
||||||
*> L is an lower-triangular M-by-M matrix;
|
*> L is a lower-triangular M-by-M matrix;
|
||||||
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
||||||
*>
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
|
@ -187,7 +187,7 @@
|
||||||
* ..
|
* ..
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
LOGICAL LQUERY, LMINWS, MINT, MINW
|
LOGICAL LQUERY, LMINWS, MINT, MINW
|
||||||
INTEGER MB, NB, MINTSZ, NBLCKS
|
INTEGER MB, NB, MINTSZ, NBLCKS, LWMIN, LWOPT, LWREQ
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAME
|
LOGICAL LSAME
|
||||||
|
@ -243,20 +243,32 @@
|
||||||
*
|
*
|
||||||
* Determine if the workspace size satisfies minimal size
|
* Determine if the workspace size satisfies minimal size
|
||||||
*
|
*
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWMIN = MAX( 1, N )
|
||||||
|
LWOPT = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWMIN = MAX( 1, M )
|
||||||
|
LWOPT = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
LMINWS = .FALSE.
|
LMINWS = .FALSE.
|
||||||
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M )
|
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.LWOPT )
|
||||||
$ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ )
|
$ .AND. ( LWORK.GE.LWMIN ) .AND. ( TSIZE.GE.MINTSZ )
|
||||||
$ .AND. ( .NOT.LQUERY ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) ) THEN
|
||||||
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
NB = N
|
NB = N
|
||||||
END IF
|
END IF
|
||||||
IF( LWORK.LT.MB*M ) THEN
|
IF( LWORK.LT.LWOPT ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWREQ = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWREQ = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
*
|
*
|
||||||
IF( M.LT.0 ) THEN
|
IF( M.LT.0 ) THEN
|
||||||
INFO = -1
|
INFO = -1
|
||||||
|
@ -267,7 +279,7 @@
|
||||||
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
||||||
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -6
|
INFO = -6
|
||||||
ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY )
|
ELSE IF( ( LWORK.LT.LWREQ ) .AND .( .NOT.LQUERY )
|
||||||
$ .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -8
|
INFO = -8
|
||||||
END IF
|
END IF
|
||||||
|
@ -281,9 +293,9 @@
|
||||||
T( 2 ) = MB
|
T( 2 ) = MB
|
||||||
T( 3 ) = NB
|
T( 3 ) = NB
|
||||||
IF( MINW ) THEN
|
IF( MINW ) THEN
|
||||||
WORK( 1 ) = MAX( 1, N )
|
WORK( 1 ) = LWMIN
|
||||||
ELSE
|
ELSE
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
IF( INFO.NE.0 ) THEN
|
IF( INFO.NE.0 ) THEN
|
||||||
|
@ -308,7 +320,7 @@
|
||||||
$ LWORK, INFO )
|
$ LWORK, INFO )
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
*
|
*
|
||||||
RETURN
|
RETURN
|
||||||
*
|
*
|
||||||
|
|
|
@ -258,7 +258,7 @@
|
||||||
TSZM = INT( TQ( 1 ) )
|
TSZM = INT( TQ( 1 ) )
|
||||||
LWM = INT( WORKQ( 1 ) )
|
LWM = INT( WORKQ( 1 ) )
|
||||||
CALL DGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
CALL DGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
||||||
$ TSZO, B, LDB, WORKQ, -1, INFO2 )
|
$ TSZM, B, LDB, WORKQ, -1, INFO2 )
|
||||||
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
||||||
WSIZEO = TSZO + LWO
|
WSIZEO = TSZO + LWO
|
||||||
WSIZEM = TSZM + LWM
|
WSIZEM = TSZM + LWM
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
*> where:
|
*> where:
|
||||||
*>
|
*>
|
||||||
*> Q is a N-by-N orthogonal matrix;
|
*> Q is a N-by-N orthogonal matrix;
|
||||||
*> L is an lower-triangular M-by-M matrix;
|
*> L is a lower-triangular M-by-M matrix;
|
||||||
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
||||||
*>
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
|
@ -187,7 +187,7 @@
|
||||||
* ..
|
* ..
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
LOGICAL LQUERY, LMINWS, MINT, MINW
|
LOGICAL LQUERY, LMINWS, MINT, MINW
|
||||||
INTEGER MB, NB, MINTSZ, NBLCKS
|
INTEGER MB, NB, MINTSZ, NBLCKS, LWMIN, LWOPT, LWREQ
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAME
|
LOGICAL LSAME
|
||||||
|
@ -243,20 +243,32 @@
|
||||||
*
|
*
|
||||||
* Determine if the workspace size satisfies minimal size
|
* Determine if the workspace size satisfies minimal size
|
||||||
*
|
*
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWMIN = MAX( 1, N )
|
||||||
|
LWOPT = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWMIN = MAX( 1, M )
|
||||||
|
LWOPT = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
LMINWS = .FALSE.
|
LMINWS = .FALSE.
|
||||||
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M )
|
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.LWOPT )
|
||||||
$ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ )
|
$ .AND. ( LWORK.GE.LWMIN ) .AND. ( TSIZE.GE.MINTSZ )
|
||||||
$ .AND. ( .NOT.LQUERY ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) ) THEN
|
||||||
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
NB = N
|
NB = N
|
||||||
END IF
|
END IF
|
||||||
IF( LWORK.LT.MB*M ) THEN
|
IF( LWORK.LT.LWOPT ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWREQ = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWREQ = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
*
|
*
|
||||||
IF( M.LT.0 ) THEN
|
IF( M.LT.0 ) THEN
|
||||||
INFO = -1
|
INFO = -1
|
||||||
|
@ -267,7 +279,7 @@
|
||||||
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
||||||
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -6
|
INFO = -6
|
||||||
ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY )
|
ELSE IF( ( LWORK.LT.LWREQ ) .AND .( .NOT.LQUERY )
|
||||||
$ .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -8
|
INFO = -8
|
||||||
END IF
|
END IF
|
||||||
|
@ -281,9 +293,9 @@
|
||||||
T( 2 ) = MB
|
T( 2 ) = MB
|
||||||
T( 3 ) = NB
|
T( 3 ) = NB
|
||||||
IF( MINW ) THEN
|
IF( MINW ) THEN
|
||||||
WORK( 1 ) = MAX( 1, N )
|
WORK( 1 ) = LWMIN
|
||||||
ELSE
|
ELSE
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
IF( INFO.NE.0 ) THEN
|
IF( INFO.NE.0 ) THEN
|
||||||
|
@ -308,7 +320,7 @@
|
||||||
$ LWORK, INFO )
|
$ LWORK, INFO )
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
RETURN
|
RETURN
|
||||||
*
|
*
|
||||||
* End of SGELQ
|
* End of SGELQ
|
||||||
|
|
|
@ -258,7 +258,7 @@
|
||||||
TSZM = INT( TQ( 1 ) )
|
TSZM = INT( TQ( 1 ) )
|
||||||
LWM = INT( WORKQ( 1 ) )
|
LWM = INT( WORKQ( 1 ) )
|
||||||
CALL SGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
CALL SGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
||||||
$ TSZO, B, LDB, WORKQ, -1, INFO2 )
|
$ TSZM, B, LDB, WORKQ, -1, INFO2 )
|
||||||
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
||||||
WSIZEO = TSZO + LWO
|
WSIZEO = TSZO + LWO
|
||||||
WSIZEM = TSZM + LWM
|
WSIZEM = TSZM + LWM
|
||||||
|
|
|
@ -26,7 +26,7 @@
|
||||||
*> where:
|
*> where:
|
||||||
*>
|
*>
|
||||||
*> Q is a N-by-N orthogonal matrix;
|
*> Q is a N-by-N orthogonal matrix;
|
||||||
*> L is an lower-triangular M-by-M matrix;
|
*> L is a lower-triangular M-by-M matrix;
|
||||||
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
*> 0 is a M-by-(N-M) zero matrix, if M < N.
|
||||||
*>
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
|
@ -187,7 +187,7 @@
|
||||||
* ..
|
* ..
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
LOGICAL LQUERY, LMINWS, MINT, MINW
|
LOGICAL LQUERY, LMINWS, MINT, MINW
|
||||||
INTEGER MB, NB, MINTSZ, NBLCKS
|
INTEGER MB, NB, MINTSZ, NBLCKS, LWMIN, LWOPT, LWREQ
|
||||||
* ..
|
* ..
|
||||||
* .. External Functions ..
|
* .. External Functions ..
|
||||||
LOGICAL LSAME
|
LOGICAL LSAME
|
||||||
|
@ -243,20 +243,32 @@
|
||||||
*
|
*
|
||||||
* Determine if the workspace size satisfies minimal size
|
* Determine if the workspace size satisfies minimal size
|
||||||
*
|
*
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWMIN = MAX( 1, N )
|
||||||
|
LWOPT = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWMIN = MAX( 1, M )
|
||||||
|
LWOPT = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
LMINWS = .FALSE.
|
LMINWS = .FALSE.
|
||||||
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.MB*M )
|
IF( ( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) .OR. LWORK.LT.LWOPT )
|
||||||
$ .AND. ( LWORK.GE.M ) .AND. ( TSIZE.GE.MINTSZ )
|
$ .AND. ( LWORK.GE.LWMIN ) .AND. ( TSIZE.GE.MINTSZ )
|
||||||
$ .AND. ( .NOT.LQUERY ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) ) THEN
|
||||||
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 ) ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
NB = N
|
NB = N
|
||||||
END IF
|
END IF
|
||||||
IF( LWORK.LT.MB*M ) THEN
|
IF( LWORK.LT.LWOPT ) THEN
|
||||||
LMINWS = .TRUE.
|
LMINWS = .TRUE.
|
||||||
MB = 1
|
MB = 1
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
|
IF( ( N.LE.M ) .OR. ( NB.LE.M ) .OR. ( NB.GE.N ) ) THEN
|
||||||
|
LWREQ = MAX( 1, MB*N )
|
||||||
|
ELSE
|
||||||
|
LWREQ = MAX( 1, MB*M )
|
||||||
|
END IF
|
||||||
*
|
*
|
||||||
IF( M.LT.0 ) THEN
|
IF( M.LT.0 ) THEN
|
||||||
INFO = -1
|
INFO = -1
|
||||||
|
@ -267,7 +279,7 @@
|
||||||
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
ELSE IF( TSIZE.LT.MAX( 1, MB*M*NBLCKS + 5 )
|
||||||
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LQUERY ) .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -6
|
INFO = -6
|
||||||
ELSE IF( ( LWORK.LT.MAX( 1, M*MB ) ) .AND .( .NOT.LQUERY )
|
ELSE IF( ( LWORK.LT.LWREQ ) .AND .( .NOT.LQUERY )
|
||||||
$ .AND. ( .NOT.LMINWS ) ) THEN
|
$ .AND. ( .NOT.LMINWS ) ) THEN
|
||||||
INFO = -8
|
INFO = -8
|
||||||
END IF
|
END IF
|
||||||
|
@ -281,9 +293,9 @@
|
||||||
T( 2 ) = MB
|
T( 2 ) = MB
|
||||||
T( 3 ) = NB
|
T( 3 ) = NB
|
||||||
IF( MINW ) THEN
|
IF( MINW ) THEN
|
||||||
WORK( 1 ) = MAX( 1, N )
|
WORK( 1 ) = LWMIN
|
||||||
ELSE
|
ELSE
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
END IF
|
END IF
|
||||||
END IF
|
END IF
|
||||||
IF( INFO.NE.0 ) THEN
|
IF( INFO.NE.0 ) THEN
|
||||||
|
@ -308,7 +320,7 @@
|
||||||
$ LWORK, INFO )
|
$ LWORK, INFO )
|
||||||
END IF
|
END IF
|
||||||
*
|
*
|
||||||
WORK( 1 ) = MAX( 1, MB*M )
|
WORK( 1 ) = LWREQ
|
||||||
*
|
*
|
||||||
RETURN
|
RETURN
|
||||||
*
|
*
|
||||||
|
|
|
@ -261,7 +261,7 @@
|
||||||
TSZM = INT( TQ( 1 ) )
|
TSZM = INT( TQ( 1 ) )
|
||||||
LWM = INT( WORKQ( 1 ) )
|
LWM = INT( WORKQ( 1 ) )
|
||||||
CALL ZGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
CALL ZGEMLQ( 'L', TRANS, N, NRHS, M, A, LDA, TQ,
|
||||||
$ TSZO, B, LDB, WORKQ, -1, INFO2 )
|
$ TSZM, B, LDB, WORKQ, -1, INFO2 )
|
||||||
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
LWM = MAX( LWM, INT( WORKQ( 1 ) ) )
|
||||||
WSIZEO = TSZO + LWO
|
WSIZEO = TSZO + LWO
|
||||||
WSIZEM = TSZM + LWM
|
WSIZEM = TSZM + LWM
|
||||||
|
|
Loading…
Reference in New Issue