Merge pull request #2099 from martin-frbg/rela-gbtrf
Disable repeated recursion on Ab_BR in ReLAPACK xGBTRF
This commit is contained in:
commit
9c4edd38f2
|
@ -221,7 +221,9 @@ static void RELAPACK_cgbtrf_rec(
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_cgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
//RELAPACK_cgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(cgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
|
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#include "relapack.h"
|
#include "relapack.h"
|
||||||
#include "stdlib.h"
|
#include <stdlib.h>
|
||||||
|
#include <stdio.h>
|
||||||
static void RELAPACK_dgbtrf_rec(const blasint *, const blasint *, const blasint *,
|
static void RELAPACK_dgbtrf_rec(const blasint *, const blasint *, const blasint *,
|
||||||
const blasint *, double *, const blasint *, blasint *, double *, const blasint *, double *,
|
const blasint *, double *, const blasint *, blasint *, double *, const blasint *, double *,
|
||||||
const blasint *, blasint *);
|
const blasint *, blasint *);
|
||||||
|
@ -218,7 +219,8 @@ static void RELAPACK_dgbtrf_rec(
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_dgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
// RELAPACK_dgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(dgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
|
@ -27,7 +27,7 @@ void RELAPACK_sgbtrf(
|
||||||
*info = -3;
|
*info = -3;
|
||||||
else if (*ku < 0)
|
else if (*ku < 0)
|
||||||
*info = -4;
|
*info = -4;
|
||||||
else if (*ldAb < 2 * *kl + *ku + 1)
|
else if (*ldAb < 2 * *kl + *ku + 1)
|
||||||
*info = -6;
|
*info = -6;
|
||||||
if (*info) {
|
if (*info) {
|
||||||
const blasint minfo = -*info;
|
const blasint minfo = -*info;
|
||||||
|
@ -55,15 +55,16 @@ void RELAPACK_sgbtrf(
|
||||||
|
|
||||||
// Allocate work space
|
// Allocate work space
|
||||||
const blasint n1 = SREC_SPLIT(*n);
|
const blasint n1 = SREC_SPLIT(*n);
|
||||||
const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
const blasint mWorkl = abs( (kv > n1) ? MAX(1, *m - *kl) : kv );
|
||||||
const blasint nWorkl = (kv > n1) ? n1 : kv;
|
const blasint nWorkl = abs( (kv > n1) ? n1 : kv );
|
||||||
const blasint mWorku = (*kl > n1) ? n1 : *kl;
|
const blasint mWorku = abs( (*kl > n1) ? n1 : *kl );
|
||||||
const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
const blasint nWorku = abs( (*kl > n1) ? MAX(0, *n - *kl) : *kl );
|
||||||
float *Workl = malloc(mWorkl * nWorkl * sizeof(float));
|
float *Workl = malloc(mWorkl * nWorkl * sizeof(float));
|
||||||
float *Worku = malloc(mWorku * nWorku * sizeof(float));
|
float *Worku = malloc(mWorku * nWorku * sizeof(float));
|
||||||
LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||||
LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku);
|
||||||
|
|
||||||
|
|
||||||
// Recursive kernel
|
// Recursive kernel
|
||||||
RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info);
|
||||||
|
|
||||||
|
@ -81,6 +82,7 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
blasint *info
|
blasint *info
|
||||||
) {
|
) {
|
||||||
|
|
||||||
|
|
||||||
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) {
|
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) {
|
||||||
// Unblocked
|
// Unblocked
|
||||||
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info);
|
||||||
|
@ -127,7 +129,7 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
float *const A_BR = A + *ldA * n1 + m1;
|
float *const A_BR = A + *ldA * n1 + m1;
|
||||||
|
|
||||||
// ipiv_T
|
// ipiv_T
|
||||||
// ipiv_B
|
// ipiv_B
|
||||||
blasint *const ipiv_T = ipiv;
|
blasint *const ipiv_T = ipiv;
|
||||||
blasint *const ipiv_B = ipiv + n1;
|
blasint *const ipiv_B = ipiv + n1;
|
||||||
|
|
||||||
|
@ -155,6 +157,7 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
float *const A_BRbl = A_BR + m21;
|
float *const A_BRbl = A_BR + m21;
|
||||||
float *const A_BRbr = A_BR + *ldA * n21 + m21;
|
float *const A_BRbr = A_BR + *ldA * n21 + m21;
|
||||||
|
|
||||||
|
|
||||||
// recursion(Ab_L, ipiv_T)
|
// recursion(Ab_L, ipiv_T)
|
||||||
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
|
||||||
|
@ -216,8 +219,11 @@ static void RELAPACK_sgbtrf_rec(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
//cause of infinite recursion here ?
|
||||||
|
// RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
|
@ -56,10 +56,10 @@ void RELAPACK_zgbtrf(
|
||||||
|
|
||||||
// Allocate work space
|
// Allocate work space
|
||||||
const blasint n1 = ZREC_SPLIT(*n);
|
const blasint n1 = ZREC_SPLIT(*n);
|
||||||
const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv;
|
const blasint mWorkl = abs ( (kv > n1) ? MAX(1, *m - *kl) : kv);
|
||||||
const blasint nWorkl = (kv > n1) ? n1 : kv;
|
const blasint nWorkl = abs ( (kv > n1) ? n1 : kv);
|
||||||
const blasint mWorku = (*kl > n1) ? n1 : *kl;
|
const blasint mWorku = abs ( (*kl > n1) ? n1 : *kl);
|
||||||
const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl;
|
const blasint nWorku = abs ( (*kl > n1) ? MAX(0, *n - *kl) : *kl);
|
||||||
double *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(double));
|
double *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(double));
|
||||||
double *Worku = malloc(mWorku * nWorku * 2 * sizeof(double));
|
double *Worku = malloc(mWorku * nWorku * 2 * sizeof(double));
|
||||||
LAPACK(zlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
LAPACK(zlaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl);
|
||||||
|
@ -221,7 +221,9 @@ static void RELAPACK_zgbtrf_rec(
|
||||||
}
|
}
|
||||||
|
|
||||||
// recursion(Ab_BR, ipiv_B)
|
// recursion(Ab_BR, ipiv_B)
|
||||||
RELAPACK_zgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
// RELAPACK_zgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info);
|
||||||
|
LAPACK(zgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info);
|
||||||
|
|
||||||
if (*info)
|
if (*info)
|
||||||
*info += n1;
|
*info += n1;
|
||||||
// shift pivots
|
// shift pivots
|
||||||
|
|
Loading…
Reference in New Issue