From e08743d9771c96afa6dce3d2326aab3554db6337 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Wed, 12 Jul 2023 23:02:36 +0200 Subject: [PATCH] Update to use safe scaling algorithm from Reference-LAPACK PR 527 --- interface/rotg.c | 62 ++++++++++++++++++++++++++++++++++------------- interface/zrotg.c | 40 ++++++++++++++++++++++++++---- 2 files changed, 80 insertions(+), 22 deletions(-) diff --git a/interface/rotg.c b/interface/rotg.c index 69443a5a0..3ccf4f7eb 100644 --- a/interface/rotg.c +++ b/interface/rotg.c @@ -1,9 +1,11 @@ #include +#include #include "common.h" #ifdef FUNCTION_PROFILE #include "functable.h" #endif + #ifndef CBLAS void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ @@ -14,17 +16,27 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ #endif +#ifdef DOUBLE + long double safmin = DBL_MIN; +#else + long double safmin = FLT_MIN; +#endif + #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da = *DA; long double db = *DB; long double c; long double s; - long double r, roe, z; + long double r, z; + long double sigma, dascal,dbscal; long double ada = fabsl(da); long double adb = fabsl(db); - long double scale = ada + adb; + long double maxab = MAX(ada,adb); + long double safmax; + long double scale; + #ifndef CBLAS PRINT_DEBUG_NAME; @@ -32,17 +44,25 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ PRINT_DEBUG_CNAME; #endif - roe = db; - if (ada > adb) roe = da; - - if (scale == ZERO) { + if (adb == ZERO) { *C = ONE; *S = ZERO; - *DA = ZERO; *DB = ZERO; + } else if (ada == ZERO) { + *C = ZERO; + *S = ONE; + *DA = *DB; + *DB = ONE; } else { - r = sqrt(da * da + db * db); - if (roe < 0) r = -r; + safmax = 1./safmin; + scale = MIN(MAX(safmin,maxab), safmax); + if (ada > adb) + sigma = copysign(1.,da); + else + sigma = copysign(1.,db); + dascal = da / scale; + dbscal = db / scale; + r = sigma * (scale * sqrt(dascal * dascal + dbscal * dbscal)); c = da / r; s = db / r; z = ONE; @@ -65,11 +85,22 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ FLOAT db = *DB; FLOAT c = *C; FLOAT s = *S; - FLOAT r, roe, z; + FLOAT sigma; + FLOAT r, z; FLOAT ada = fabs(da); FLOAT adb = fabs(db); - FLOAT scale = ada + adb; + FLOAT maxab = MAX(ada,adb); + long double safmax ; + FLOAT scale ; + + safmax = 1./safmin; + scale = MIN(MAX(safmin,maxab), safmax); + + if (ada > adb) + sigma = sign(1.,da); + else + sigma = sign(1.,db); #ifndef CBLAS PRINT_DEBUG_NAME; @@ -77,20 +108,17 @@ void CNAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ PRINT_DEBUG_CNAME; #endif - roe = db; - if (ada > adb) roe = da; - if (scale == ZERO) { + if (adb == ZERO) { *C = ONE; *S = ZERO; - *DA = ZERO; + DA = ZERO; *DB = ZERO; } else { FLOAT aa = da / scale; FLOAT bb = db / scale; - r = scale * sqrt(aa * aa + bb * bb); - if (roe < 0) r = -r; + r = sigma * scale * sqrt(aa * aa + bb * bb); c = da / r; s = db / r; z = ONE; diff --git a/interface/zrotg.c b/interface/zrotg.c index 123f4da85..7c4e2ed08 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -1,9 +1,11 @@ #include +#include #include "common.h" #ifdef FUNCTION_PROFILE #include "functable.h" #endif + #ifndef CBLAS void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){ @@ -14,6 +16,12 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FLOAT *S = (FLOAT*) VS; #endif /* CBLAS */ +#ifdef DOUBLE + long double safmin = DBL_MIN; +#else + long double safmin = FLT_MIN; +#endif + #if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) long double da_r = *(DA + 0); @@ -23,6 +31,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { long double r; long double ada = fabsl(da_r) + fabsl(da_i); + long double adb = sqrt(db_r * db_r + db_i * db_i); PRINT_DEBUG_NAME; @@ -38,10 +47,24 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { *(DA + 1) = db_i; } else { long double alpha_r, alpha_i; + long double safmax = 1./safmin; + long double sigma; + long double maxab = MAX(ada,adb); + long double scale = MIN(MAX(safmin,maxab), safmax); - ada = sqrt(da_r * da_r + da_i * da_i); - r = sqrt(da_r * da_r + da_i * da_i + db_r * db_r + db_i * db_i); + long double aa_r = da_r / scale; + long double aa_i = da_i / scale; + long double bb_r = db_r / scale; + long double bb_i = db_i / scale; + + if (ada > adb) + sigma = copysign(1.,da_r); + else + sigma = copysign(1.,db_r); + + r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); + alpha_r = da_r / ada; alpha_i = da_i / ada; @@ -60,7 +83,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FLOAT r; FLOAT ada = fabs(da_r) + fabs(da_i); - FLOAT adb; + FLOAT ada = fabs(db_r) + fabs(db_i); PRINT_DEBUG_NAME; @@ -75,6 +98,7 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { *(DA + 0) = db_r; *(DA + 1) = db_i; } else { + long double safmax = 1./safmin; FLOAT scale; FLOAT aa_r, aa_i, bb_r, bb_i; FLOAT alpha_r, alpha_i; @@ -108,14 +132,20 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { scale = (bb_i / bb_r); adb = bb_r * sqrt(ONE + scale * scale); } - scale = ada + adb; + FLOAT maxab = MAX(ada,adb); + scale = MIN(MAX(safmin,maxab), safmax); aa_r = da_r / scale; aa_i = da_i / scale; bb_r = db_r / scale; bb_i = db_i / scale; - r = scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); + if (ada > adb) + sigma = copysign(1.,da_r); + else + sigma = copysign(1.,db_r); + + r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); alpha_r = da_r / ada; alpha_i = da_i / ada;