diff --git a/interface/zrotg.c b/interface/zrotg.c index dd765f05f..af6f85c1c 100644 --- a/interface/zrotg.c +++ b/interface/zrotg.c @@ -18,20 +18,26 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { #ifdef DOUBLE long double safmin = DBL_MIN; + long double rtmin = sqrt(DBL_MIN/DBL_EPSILON); #else long double safmin = FLT_MIN; + long double rtmin = sqrt(FLT_MIN/FLT_EPSILON); #endif -#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) - long double da_r = *(DA + 0); - long double da_i = *(DA + 1); - long double db_r = *(DB + 0); - long double db_i = *(DB + 1); - long double r; + FLOAT da_r = *(DA+0); + FLOAT da_i = *(DA+1); + FLOAT db_r = *(DB+0); + FLOAT db_i = *(DB+1); + //long double r; + FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT)); + FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT)); + long double d; - long double ada = fabsl(da_r) + fabsl(da_i); - long double adb = sqrt(db_r * db_r + db_i * db_i); + FLOAT ada = da_r * da_r + da_i * da_i; + FLOAT adb = db_r * db_r + db_i * db_i; + FLOAT adart = sqrt( da_r * da_r + da_i * da_i); + FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i); PRINT_DEBUG_NAME; @@ -39,128 +45,137 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { FUNCTION_PROFILE_START(); - if (ada == ZERO) { - *C = ZERO; - *(S + 0) = ONE; + if (db_r == ZERO && db_i == ZERO) { + *C = ONE; + *(S + 0) = ZERO; *(S + 1) = ZERO; - *(DA + 0) = db_r; - *(DA + 1) = db_i; - } else { - long double alpha_r, alpha_i; - long double safmax = 1./safmin; - long double sigma; - long double maxab = MAX(ada,adb); - long double scale = MIN(MAX(safmin,maxab), safmax); - - - long double aa_r = da_r / scale; - long double aa_i = da_i / scale; - long double bb_r = db_r / scale; - long double bb_i = db_i / scale; - - if (ada > adb) - sigma = copysign(1.,da_r); - else - sigma = copysign(1.,db_r); - - r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); - - - alpha_r = da_r / ada; - alpha_i = da_i / ada; - - *(C + 0) = ada / r; - *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; - *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; - *(DA + 0) = alpha_r * r; - *(DA + 1) = alpha_i * r; + return; } + + long double safmax = 1./safmin; +#if defined DOUBLE + long double rtmax = safmax /DBL_EPSILON; #else - FLOAT da_r = *(DA + 0); - FLOAT da_i = *(DA + 1); - FLOAT db_r = *(DB + 0); - FLOAT db_i = *(DB + 1); - FLOAT r; - - FLOAT ada = fabs(da_r) + fabs(da_i); - FLOAT adb = fabs(db_r) + fabs(db_i); - - PRINT_DEBUG_NAME; - - IDEBUG_START; - - FUNCTION_PROFILE_START(); - - if (ada == ZERO) { - *C = ZERO; - *(S + 0) = ONE; - *(S + 1) = ZERO; - *(DA + 0) = db_r; - *(DA + 1) = db_i; - } else { - long double safmax = 1./safmin; - FLOAT scale, sigma; - FLOAT aa_r, aa_i, bb_r, bb_i; - FLOAT alpha_r, alpha_i; - - aa_r = fabs(da_r); - aa_i = fabs(da_i); - - if (aa_i > aa_r) { - aa_r = fabs(da_i); - aa_i = fabs(da_r); - } - - if (aa_r == ZERO) { - ada = 0.; - } else { - scale = (aa_i / aa_r); - ada = aa_r * sqrt(ONE + scale * scale); - } - - bb_r = fabs(db_r); - bb_i = fabs(db_i); - - if (bb_i > bb_r) { - bb_r = fabs(bb_i); - bb_i = fabs(bb_r); - } - - if (bb_r == ZERO) { - adb = 0.; - } else { - scale = (bb_i / bb_r); - adb = bb_r * sqrt(ONE + scale * scale); - } - FLOAT maxab = MAX(ada,adb); - scale = MIN(MAX(safmin,maxab), safmax); - - aa_r = da_r / scale; - aa_i = da_i / scale; - bb_r = db_r / scale; - bb_i = db_i / scale; - - if (ada > adb) - sigma = copysign(1.,da_r); - else - sigma = copysign(1.,db_r); - - r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); - - alpha_r = da_r / ada; - alpha_i = da_i / ada; - - *(C + 0) = ada / r; - *(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; - *(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; - *(DA + 0) = alpha_r * r; - *(DA + 1) = alpha_i * r; - } + long double rtmax = safmax /FLT_EPSILON; #endif - - FUNCTION_PROFILE_END(4, 4, 4); - - IDEBUG_END; - - return; + *(S1 + 0) = *(DB + 0); + *(S1 + 1) = *(DB + 1) *-1; + if (da_r == ZERO && da_i == ZERO) { + *C = ZERO; + if (db_r == ZERO) { + (*DA) = fabsl(db_i); + *S = *S1 /da_r; + *(S+1) = *(S1+1) /da_r; + return; + } else if ( db_i == ZERO) { + *DA = fabsl(db_r); + *S = *S1 /da_r; + *(S+1) = *(S1+1) /da_r; + return; + } else { + long double g1 = MAX( fabsl(db_r), fabsl(db_i)); + rtmax =sqrt(safmax/2.); + if (g1 > rtmin && g1 < rtmax) { // unscaled + d = sqrt(adb); + *S = *S1 /d; + *(S+1) = *(S1+1) /d; + *DA = d ; + *(DA+1) = ZERO; + return; + } else { // scaled algorithm + long double u = MIN ( safmax, MAX ( safmin, g1)); + FLOAT gs_r = db_r/u; + FLOAT gs_i = db_i/u; + d = sqrt ( gs_r*gs_r + gs_i*gs_i); + *S = gs_r / d; + *(S + 1) = (gs_i * -1) / d; + *DA = d * u; + *(DA+1) = ZERO; + return; + } + } + } else { + FLOAT f1 = MAX ( fabsl(da_r), fabsl(da_i)); + FLOAT g1 = MAX ( fabsl(db_r), fabsl(db_i)); + rtmax = sqrt(safmax / 4.); + if ( f1 > rtmin && f1 < rtmax && g1 > rtmin && g1 < rtmax) { //unscaled + long double h = ada + adb; + double adahsq = sqrt(ada * h); + if (ada >= h *safmin) { + *C = sqrt(ada/h); + *R = *DA / *C; + *(R+1) = *(DA+1) / *(C+1); + rtmax *= 2.; + if ( ada > rtmin && h < rtmax) { // no risk of intermediate overflow + *S = *S1 * (*DA / adahsq) - *(S1+1)* (*(DA+1)/adahsq); + *(S+1) = *S1 * (*(DA+1) / adahsq) + *(S1+1) * (*DA/adahsq); + } else { + *S = *S1 * (*R/h) - *(S1+1) * (*(R+1)/h); + *(S+1) = *S1 * (*(R+1)/h) + *(S1+1) * (*(R)/h); + } + } else { + *C = ada / adahsq; + if (*C >= safmin) + *R = *DA / *C; + else + *R = *DA * (h / adahsq); + *S = *S1 * ada / adahsq; + *(S+1) = *(S1+1) * ada / adahsq; + } + *DA=*R; + *(DA+1)=*(R+1); + return; + } else { // scaled + FLOAT fs_r, fs_i, gs_r, gs_i; + long double v,w,f2,g2,h; + long double u = MIN ( safmax, MAX ( safmin, MAX(f1,g1))); + gs_r = db_r/u; + gs_i = db_i/u; + g2 = sqrt ( gs_r*gs_r + gs_i*gs_i); + if (f1 /u < rtmin) { + v = MIN (safmax, MAX (safmin, f1)); + w = v / u; + fs_r = *DA/ v; + fs_i = *(DA+1) / v; + f2 = sqrt ( fs_r*fs_r + fs_i*fs_i); + h = f2 * w * w + g2; + } else { // use same scaling for both + w = 1.; + fs_r = *DA/ u; + fs_i = *(DA+1) / u; + f2 = sqrt ( fs_r*fs_r + fs_i*fs_i); + h = f2 + g2; + } + if ( f2 >= h * safmin) { + *C = sqrt ( f2 / h ); + *DA = fs_r / *C; + *(DA+1) = fs_i / *C; + rtmax *= 2; + if ( f2 > rtmin && h < rtmax) { + *S = gs_r * (fs_r /sqrt(f2*h)) - gs_i * (fs_i / sqrt(f2*h)); + *(S+1) = gs_r * (fs_i /sqrt(f2*h)) + gs_i * -1. * (fs_r / sqrt(f2*h)); + } else { + *S = gs_r * (*DA/h) - gs_i * (*(DA+1) / h); + *(S+1) = gs_r * (*(DA+1) /h) + gs_i * -1. * (*DA / h); + } + } else { // intermediates might overflow + d = sqrt ( f2 * h); + *C = f2 /d; + if (*C >= safmin) { + *DA = fs_r / *C; + *(DA+1) = fs_i / *C; + } else { + *DA = fs_r * (h / d); + *(DA+1) = fs_i / (h / d); + } + *S = gs_r * (fs_r /d) - gs_i * (fs_i / d); + *(S+1) = gs_r * (fs_i /d) + gs_i * -1. * (fs_r / d); + } + *C *= w; + *DA *= u; + *(DA+1) *= u; + return; + } + } } + \ No newline at end of file