Merge pull request #1454 from martin-frbg/issue1452

Keep the flag handling separate from the scaling loops in rotmg
This commit is contained in:
Martin Kroeker 2018-02-11 20:48:04 +01:00 committed by GitHub
commit 6940c59a88
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 255 additions and 47 deletions

View File

@ -136,7 +136,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
if(*dd1 != ZERO) if(*dd1 != ZERO)
{ {
while( (*dd1 <= RGAMSQ) || (*dd1 >= GAMSQ) ) if( (*dd1 <= RGAMSQ) || (*dd1 >= GAMSQ) )
{ {
if(dflag == ZERO) if(dflag == ZERO)
{ {
@ -146,33 +146,34 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
} }
else else
{ {
if(dflag == ONE)
{
dh21 = -ONE; dh21 = -ONE;
dh12 = ONE; dh12 = ONE;
dflag = -ONE; dflag = -ONE;
}
} }
if( *dd1 <= RGAMSQ ) if( *dd1 <= RGAMSQ )
{ {
*dd1 = *dd1 * (GAM * GAM); while (ABS(*dd1) <= RGAMSQ) {
*dx1 = *dx1 / GAM; *dd1 = *dd1 * (GAM * GAM);
dh11 = dh11 / GAM; *dx1 = *dx1 / GAM;
dh12 = dh12 / GAM; dh11 = dh11 / GAM;
dh12 = dh12 / GAM;
}
} }
else else
{ {
*dd1 = *dd1 / (GAM * GAM); while (ABS(*dd1) <= GAMSQ) {
*dx1 = *dx1 * GAM; *dd1 = *dd1 / (GAM * GAM);
dh11 = dh11 * GAM; *dx1 = *dx1 * GAM;
dh12 = dh12 * GAM; dh11 = dh11 * GAM;
dh12 = dh12 * GAM;
}
} }
} }
} }
if(*dd2 != ZERO) if(*dd2 != ZERO)
{ {
while( (ABS(*dd2) <= RGAMSQ) || (ABS(*dd2) >= GAMSQ) ) if( (ABS(*dd2) <= RGAMSQ) || (ABS(*dd2) >= GAMSQ) )
{ {
if(dflag == ZERO) if(dflag == ZERO)
{ {
@ -182,24 +183,25 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
} }
else else
{ {
if(dflag == ONE)
{
dh21 = -ONE; dh21 = -ONE;
dh12 = ONE; dh12 = ONE;
dflag = -ONE; dflag = -ONE;
}
} }
if( ABS(*dd2) <= RGAMSQ ) if( ABS(*dd2) <= RGAMSQ )
{ {
*dd2 = *dd2 * (GAM * GAM); while (ABS(*dd2) <= RGAMSQ) {
dh21 = dh21 / GAM; *dd2 = *dd2 * (GAM * GAM);
dh22 = dh22 / GAM; dh21 = dh21 / GAM;
dh22 = dh22 / GAM;
}
} }
else else
{ {
*dd2 = *dd2 / (GAM * GAM); while (ABS(*dd2) <= GAMSQ) {
dh21 = dh21 * GAM; *dd2 = *dd2 / (GAM * GAM);
dh22 = dh22 * GAM; dh21 = dh21 * GAM;
dh22 = dh22 * GAM;
}
} }
} }
} }

View File

@ -7,6 +7,7 @@ else ()
set(OpenBLAS_utest_src set(OpenBLAS_utest_src
utest_main.c utest_main.c
test_amax.c test_amax.c
test_rotmg.c
) )
endif () endif ()

View File

@ -8,8 +8,8 @@ UTESTBIN=openblas_utest
include $(TOPDIR)/Makefile.system include $(TOPDIR)/Makefile.system
OBJS=utest_main.o test_amax.o OBJS=utest_main.o test_amax.o test_rotmg.o
#test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_fork.o #test_rot.o test_swap.o test_axpy.o test_dotu.o test_dsdot.o test_fork.o
ifneq ($(NO_LAPACK), 1) ifneq ($(NO_LAPACK), 1)
#OBJS += test_potrs.o #OBJS += test_potrs.o

View File

@ -31,9 +31,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/ **********************************************************************************/
#include "common_utest.h" #include "openblas_utest.h"
void test_drotmg() CTEST (drotmg,rotmg)
{ {
double te_d1, tr_d1; double te_d1, tr_d1;
double te_d2, tr_d2; double te_d2, tr_d2;
@ -42,31 +42,92 @@ void test_drotmg()
double te_param[5]; double te_param[5];
double tr_param[5]; double tr_param[5];
int i=0; int i=0;
te_d1= tr_d1=0.21149573940783739; // original test case for libGoto bug fixed by feb2014 rewrite
te_d2= tr_d2=0.046892057172954082; te_d1= 0.21149573940783739;
te_x1= tr_x1=-0.42272687517106533; te_d2= 0.046892057172954082;
te_y1= tr_y1=0.42211309121921659; te_x1= -0.42272687517106533;
te_y1= 0.42211309121921659;
for(i=0; i<5; i++){ for(i=0; i<5; i++){
te_param[i]=tr_param[i]=0.0; te_param[i]=tr_param[i]=0.0;
} }
//OpenBLAS //reference values as calulated by netlib blas
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
//reference
BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param);
CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS); tr_d1= 0.1732048;
CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS); tr_d2= 0.03840234;
CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS); tr_x1= -0.516180;
CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS); tr_y1= 0.422113;
tr_d1= 0.17320483687975;
tr_d2= 0.03840233915037;
tr_x1= -0.51618034832329;
tr_y1= 0.42211309121922;
tr_param[0]= 0.0;
tr_param[1]= 0.0;
tr_param[2]= 0.99854803659786;
tr_param[3]= -0.22139439665872;
tr_param[4]= 0.0;
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
for(i=0; i<5; i++){ for(i=0; i<5; i++){
CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS); ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
} }
} }
void test_drotmg_D1eqD2_X1eqX2() CTEST (drotmg,rotmg_issue1452)
{
double te_d1, tr_d1;
double te_d2, tr_d2;
double te_x1, tr_x1;
double te_y1, tr_y1;
double te_param[5];
double tr_param[5];
int i=0;
// from issue #1452, buggy version returned 0.000244 for param[3]
te_d1 = 5.9e-8;
te_d2 = 5.960464e-8;
te_x1 = 1.0;
te_y1 = 150.0;
for(i=0; i<5; i++){
te_param[i]=tr_param[i]=0.0;
}
//reference values as calulated by netlib blas
tr_d1= 0.99995592822897;
tr_d2= 0.98981219860583;
tr_x1= 0.03662270484346;
tr_y1= 150.000000000000;
tr_param[0]= -1.0;
tr_param[1]= 0.00000161109346;
tr_param[2]= -0.00024414062500;
tr_param[3]= 1.0;
tr_param[4]= 0.00000162760417;
//OpenBLAS
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
for(i=0; i<5; i++){
ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
}
}
CTEST(drotmg, rotmg_D1eqD2_X1eqX2)
{ {
double te_d1, tr_d1; double te_d1, tr_d1;
double te_d2, tr_d2; double te_d2, tr_d2;
@ -84,17 +145,27 @@ void test_drotmg_D1eqD2_X1eqX2()
te_param[i]=tr_param[i]=0.0; te_param[i]=tr_param[i]=0.0;
} }
//reference values as calulated by netlib blas
tr_d1= 1.0;
tr_d2= 1.0;
tr_x1= 16.0;
tr_y1= 8.0;
tr_param[0]=1.0;
tr_param[1]=1.0;
tr_param[2]=0.0;
tr_param[3]=0.0;
tr_param[4]=1.0;
//OpenBLAS //OpenBLAS
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
//reference
BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param);
CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS); ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS); ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS); ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS); ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
for(i=0; i<5; i++){ for(i=0; i<5; i++){
CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS); ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
} }
} }

View File

@ -49,6 +49,140 @@ CTEST(amax, samax){
ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS); ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS);
} }
CTEST (drotmg,rotmg){
double te_d1, tr_d1;
double te_d2, tr_d2;
double te_x1, tr_x1;
double te_y1, tr_y1;
double te_param[5];
double tr_param[5];
int i=0;
// original test case for libGoto bug fixed by feb2014 rewrite
te_d1= 0.21149573940783739;
te_d2= 0.046892057172954082;
te_x1= -0.42272687517106533;
te_y1= 0.42211309121921659;
for(i=0; i<5; i++){
te_param[i]=tr_param[i]=0.0;
}
//reference values as calulated by netlib blas
tr_d1= 0.1732048;
tr_d2= 0.03840234;
tr_x1= -0.516180;
tr_y1= 0.422113;
tr_d1= 0.17320483687975;
tr_d2= 0.03840233915037;
tr_x1= -0.51618034832329;
tr_y1= 0.42211309121922;
tr_param[0]= 0.0;
tr_param[1]= 0.0;
tr_param[2]= 0.99854803659786;
tr_param[3]= -0.22139439665872;
tr_param[4]= 0.0;
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
for(i=0; i<5; i++){
ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
}
}
CTEST (drotmg,rotmg_issue1452){
double te_d1, tr_d1;
double te_d2, tr_d2;
double te_x1, tr_x1;
double te_y1, tr_y1;
double te_param[5];
double tr_param[5];
int i=0;
// from issue #1452, buggy version returned 0.000244 for param[3]
te_d1 = 5.9e-8;
te_d2 = 5.960464e-8;
te_x1 = 1.0;
te_y1 = 150.0;
for(i=0; i<5; i++){
te_param[i]=tr_param[i]=0.0;
}
//reference values as calulated by netlib blas
tr_d1= 0.99995592822897;
tr_d2= 0.98981219860583;
tr_x1= 0.03662270484346;
tr_y1= 150.000000000000;
tr_param[0]= -1.0;
tr_param[1]= 0.00000161109346;
tr_param[2]= -0.00024414062500;
tr_param[3]= 1.0;
tr_param[4]= 0.00000162760417;
//OpenBLAS
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
for(i=0; i<5; i++){
ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
}
}
CTEST(drotmg, rotmg_D1eqD2_X1eqX2){
double te_d1, tr_d1;
double te_d2, tr_d2;
double te_x1, tr_x1;
double te_y1, tr_y1;
double te_param[5];
double tr_param[5];
int i=0;
te_d1= tr_d1=2.;
te_d2= tr_d2=2.;
te_x1= tr_x1=8.;
te_y1= tr_y1=8.;
for(i=0; i<5; i++){
te_param[i]=tr_param[i]=0.0;
}
//reference values as calulated by netlib blas
tr_d1= 1.0;
tr_d2= 1.0;
tr_x1= 16.0;
tr_y1= 8.0;
tr_param[0]=1.0;
tr_param[1]=1.0;
tr_param[2]=0.0;
tr_param[3]=0.0;
tr_param[4]=1.0;
//OpenBLAS
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param);
ASSERT_DBL_NEAR_TOL(te_d1, tr_d1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_d2, tr_d2, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_x1, tr_x1, DOUBLE_EPS);
ASSERT_DBL_NEAR_TOL(te_y1, tr_y1, DOUBLE_EPS);
for(i=0; i<5; i++){
ASSERT_DBL_NEAR_TOL(te_param[i], tr_param[i], DOUBLE_EPS);
}
}
int main(int argc, const char ** argv){ int main(int argc, const char ** argv){
CTEST_ADD(amax, samax); CTEST_ADD(amax, samax);