diff --git a/interface/rotmg.c b/interface/rotmg.c index acf7399e1..ce3b146c1 100644 --- a/interface/rotmg.c +++ b/interface/rotmg.c @@ -64,6 +64,13 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ FLOAT du, dp1, dp2, dq2, dq1, dh11=ZERO, dh21=ZERO, dh12=ZERO, dh22=ZERO, dflag=-ONE, dtemp; + if (*dd2 == ZERO || dy1 == ZERO) + { + dflag = -TWO; + dparam[0] = dflag; + return; + } + if(*dd1 < ZERO) { dflag = -ONE; @@ -76,6 +83,16 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ *dd2 = ZERO; *dx1 = ZERO; } + else if ((*dd1 == ZERO || *dx1 == ZERO) && *dd2 > ZERO) + { + dflag = ONE; + dh12 = 1; + dh21 = -1; + *dx1 = dy1; + dtemp = *dd1; + *dd1 = *dd2; + *dd2 = dtemp; + } else { dp2 = *dd2 * dy1; @@ -90,6 +107,9 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ dq1 = dp1 * *dx1; if(ABS(dq1) > ABS(dq2)) { + dflag = ZERO; + dh11 = ONE; + dh22 = ONE; dh21 = - dy1 / *dx1; dh12 = dp2 / dp1; @@ -100,8 +120,19 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ *dd1 = *dd1 / du; *dd2 = *dd2 / du; *dx1 = *dx1 * du; + } else { + dflag = -ONE; + dh11 = ZERO; + dh12 = ZERO; + dh21 = ZERO; + dh22 = ZERO; + + *dd1 = ZERO; + *dd2 = ZERO; + *dx1 = ZERO; } + } else { @@ -120,7 +151,9 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ } else { - dflag = ONE; + dflag = ONE; + dh21 = -ONE; + dh12 = ONE; dh11 = dp1 / dp2; dh22 = *dx1 / dy1; @@ -134,76 +167,33 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ } - if(*dd1 != ZERO) + while ( *dd1 <= RGAMSQ && *dd1 != ZERO) { - if( (*dd1 <= RGAMSQ) || (*dd1 >= GAMSQ) ) - { - if(dflag == ZERO) - { - dh11 = ONE; - dh22 = ONE; - dflag = -ONE; - } - else - { - dh21 = -ONE; - dh12 = ONE; - dflag = -ONE; - } - if( *dd1 <= RGAMSQ ) - { - while (ABS(*dd1) <= RGAMSQ) { - *dd1 = *dd1 * (GAM * GAM); - *dx1 = *dx1 / GAM; - dh11 = dh11 / GAM; - dh12 = dh12 / GAM; - } - } - else - { - while (ABS(*dd1) >= GAMSQ) { - *dd1 = *dd1 / (GAM * GAM); - *dx1 = *dx1 * GAM; - dh11 = dh11 * GAM; - dh12 = dh12 * GAM; - } - } - } + dflag = -ONE; + *dd1 = *dd1 * (GAM * GAM); + *dx1 = *dx1 / GAM; + dh11 = dh11 / GAM; + dh12 = dh12 / GAM; + } + while (ABS(*dd1) > GAMSQ) { + dflag = -ONE; + *dd1 = *dd1 / (GAM * GAM); + *dx1 = *dx1 * GAM; + dh11 = dh11 * GAM; + dh12 = dh12 * GAM; } - if(*dd2 != ZERO) - { - if( (ABS(*dd2) <= RGAMSQ) || (ABS(*dd2) >= GAMSQ) ) - { - if(dflag == ZERO) - { - dh11 = ONE; - dh22 = ONE; - dflag = -ONE; - } - else - { - dh21 = -ONE; - dh12 = ONE; - dflag = -ONE; - } - if( ABS(*dd2) <= RGAMSQ ) - { - while (ABS(*dd2) <= RGAMSQ) { - *dd2 = *dd2 * (GAM * GAM); - dh21 = dh21 / GAM; - dh22 = dh22 / GAM; - } - } - else - { - while (ABS(*dd2) >= GAMSQ) { - *dd2 = *dd2 / (GAM * GAM); - dh21 = dh21 * GAM; - dh22 = dh22 * GAM; - } - } - } + while (ABS(*dd2) <= RGAMSQ && *dd2 != ZERO) { + dflag = -ONE; + *dd2 = *dd2 * (GAM * GAM); + dh21 = dh21 / GAM; + dh22 = dh22 / GAM; + } + while (ABS(*dd2) > GAMSQ) { + dflag = -ONE; + *dd2 = *dd2 / (GAM * GAM); + dh21 = dh21 * GAM; + dh22 = dh22 * GAM; } } diff --git a/utest/test_rotmg.c b/utest/test_rotmg.c index 37aba84b3..e5ec78983 100644 --- a/utest/test_rotmg.c +++ b/utest/test_rotmg.c @@ -53,7 +53,7 @@ CTEST (drotmg,rotmg) te_param[i]=tr_param[i]=0.0; } - //reference values as calulated by netlib blas + //reference values as calculated by netlib blas tr_d1= 0.1732048; tr_d2= 0.03840234; @@ -71,13 +71,13 @@ CTEST (drotmg,rotmg) tr_param[4]= 0.0; BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); - ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); for(i=0; i<5; i++){ - ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); } } @@ -91,7 +91,7 @@ CTEST (drotmg,rotmg_issue1452) double tr_param[5]; int i=0; - // from issue #1452, buggy version returned 0.000244 for param[3] + // from issue #1452 te_d1 = 5.9e-8; te_d2 = 5.960464e-8; te_x1 = 1.0; @@ -100,8 +100,8 @@ CTEST (drotmg,rotmg_issue1452) for(i=0; i<5; i++){ te_param[i]=tr_param[i]=0.0; } - - //reference values as calulated by netlib blas + te_param[3]=1./4096.; + //reference values as calculated by gonum blas with rotmg rewritten to Hopkins' algorithm tr_d1= 0.99995592822897; tr_d2= 0.98981219860583; tr_x1= 0.03662270484346; @@ -110,19 +110,19 @@ CTEST (drotmg,rotmg_issue1452) tr_param[0]= -1.0; tr_param[1]= 0.00000161109346; tr_param[2]= -0.00024414062500; - tr_param[3]= 1.0; + tr_param[3]= 0.00024414062500; tr_param[4]= 0.00000162760417; //OpenBLAS BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); - ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); for(i=0; i<5; i++){ - ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); } } @@ -145,7 +145,7 @@ CTEST(drotmg, rotmg_D1eqD2_X1eqX2) te_param[i]=tr_param[i]=0.0; } - //reference values as calulated by netlib blas + //reference values as calculated by netlib blas tr_d1= 1.0; tr_d2= 1.0; tr_x1= 16.0; @@ -160,12 +160,47 @@ CTEST(drotmg, rotmg_D1eqD2_X1eqX2) //OpenBLAS BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); - ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); for(i=0; i<5; i++){ - ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); + } +} + +CTEST(drotmg, drotmg_D1_big_D2_big_flag_zero) +{ + double te_d1, tr_d1; + double te_d2, tr_d2; + double te_x1, tr_x1; + double te_y1, tr_y1; + double te_param[5]={1.,4096.,-4096.,1.,4096.}; + double tr_param[5]={-1.,4096.,-3584.,1792.,4096.}; + int i=0; + te_d1= tr_d1=1600000000.; + te_d2= tr_d2=800000000.; + te_x1= tr_x1=8.; + te_y1= tr_y1=7.; + + + //reference values as calculated by gonum + tr_d1= 68.96627824858757; + tr_d2= 34.483139124293785; + tr_x1= 45312.; + tr_y1= 7.0; + + + //OpenBLAS + BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); + + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); + + for(i=0; i<5; i++){ + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); } } diff --git a/utest/utest_main2.c b/utest/utest_main2.c index bcaa43ec0..aa95a5a3f 100644 --- a/utest/utest_main2.c +++ b/utest/utest_main2.c @@ -50,14 +50,15 @@ CTEST(amax, samax){ ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS); } -CTEST (drotmg,rotmg){ +CTEST (drotmg,rotmg) +{ double te_d1, tr_d1; double te_d2, tr_d2; double te_x1, tr_x1; double te_y1, tr_y1; double te_param[5]; double tr_param[5]; - blasint i=0; + int i=0; // original test case for libGoto bug fixed by feb2014 rewrite te_d1= 0.21149573940783739; te_d2= 0.046892057172954082; @@ -69,7 +70,7 @@ CTEST (drotmg,rotmg){ te_param[i]=tr_param[i]=0.0; } - //reference values as calulated by netlib blas + //reference values as calculated by netlib blas tr_d1= 0.1732048; tr_d2= 0.03840234; @@ -87,26 +88,27 @@ CTEST (drotmg,rotmg){ tr_param[4]= 0.0; BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); - ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); for(i=0; i<5; i++){ - ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); } } -CTEST (drotmg,rotmg_issue1452){ +CTEST (drotmg,rotmg_issue1452) +{ double te_d1, tr_d1; double te_d2, tr_d2; double te_x1, tr_x1; double te_y1, tr_y1; double te_param[5]; double tr_param[5]; - blasint i=0; + int i=0; - // from issue #1452, buggy version returned 0.000244 for param[3] + // from issue #1452 te_d1 = 5.9e-8; te_d2 = 5.960464e-8; te_x1 = 1.0; @@ -115,8 +117,8 @@ CTEST (drotmg,rotmg_issue1452){ for(i=0; i<5; i++){ te_param[i]=tr_param[i]=0.0; } - - //reference values as calulated by netlib blas + te_param[3]=1./4096.; + //reference values as calculated by gonum blas with rotmg rewritten to Hopkins' algorithm tr_d1= 0.99995592822897; tr_d2= 0.98981219860583; tr_x1= 0.03662270484346; @@ -125,31 +127,32 @@ CTEST (drotmg,rotmg_issue1452){ tr_param[0]= -1.0; tr_param[1]= 0.00000161109346; tr_param[2]= -0.00024414062500; - tr_param[3]= 1.0; + tr_param[3]= 0.00024414062500; tr_param[4]= 0.00000162760417; //OpenBLAS BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); - ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); for(i=0; i<5; i++){ - ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); } } -CTEST(drotmg, rotmg_D1eqD2_X1eqX2){ +CTEST(drotmg, rotmg_D1eqD2_X1eqX2) +{ double te_d1, tr_d1; double te_d2, tr_d2; double te_x1, tr_x1; double te_y1, tr_y1; double te_param[5]; double tr_param[5]; - blasint i=0; + int i=0; te_d1= tr_d1=2.; te_d2= tr_d2=2.; te_x1= tr_x1=8.; @@ -159,7 +162,7 @@ CTEST(drotmg, rotmg_D1eqD2_X1eqX2){ te_param[i]=tr_param[i]=0.0; } - //reference values as calulated by netlib blas + //reference values as calculated by netlib blas tr_d1= 1.0; tr_d2= 1.0; tr_x1= 16.0; @@ -174,13 +177,48 @@ CTEST(drotmg, rotmg_D1eqD2_X1eqX2){ //OpenBLAS BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); - ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS); - ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); for(i=0; i<5; i++){ - ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); + } +} + +CTEST(drotmg, drotmg_D1_big_D2_big_flag_zero) +{ + double te_d1, tr_d1; + double te_d2, tr_d2; + double te_x1, tr_x1; + double te_y1, tr_y1; + double te_param[5]={1.,4096.,-4096.,1.,4096.}; + double tr_param[5]={-1.,4096.,-3584.,1792.,4096.}; + int i=0; + te_d1= tr_d1=1600000000.; + te_d2= tr_d2=800000000.; + te_x1= tr_x1=8.; + te_y1= tr_y1=7.; + + + //reference values as calculated by gonum + tr_d1= 68.96627824858757; + tr_d2= 34.483139124293785; + tr_x1= 45312.; + tr_y1= 7.0; + + + //OpenBLAS + BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); + + ASSERT_DBL_NEAR_TOL(tr_d1, te_d1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_d2, te_d2, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_x1, te_x1, DOUBLE_EPS); + ASSERT_DBL_NEAR_TOL(tr_y1, te_y1, DOUBLE_EPS); + + for(i=0; i<5; i++){ + ASSERT_DBL_NEAR_TOL(tr_param[i], te_param[i], DOUBLE_EPS); } } @@ -199,8 +237,8 @@ CTEST(axpy,daxpy_inc_0) BLASFUNC(daxpy)(&N,&a,x1,&incX,y1,&incY); for(i=0; i