MIPS64: Fixed failed utest dsdot:dsdot_n_1 when TARGET=I6500

This commit is contained in:
gxw 2022-09-17 16:39:30 +08:00
parent 548a11b9d9
commit edea1bcfaf
1 changed files with 151 additions and 0 deletions

View File

@ -39,10 +39,19 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
FLOAT x0, x1, x2, x3, y0, y1, y2, y3;
v4f32 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;
v4f32 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;
#if defined(DSDOT)
v2f64 dvx0, dvx1, dvx2, dvx3, dvx4, dvx5, dvx6, dvx7;
v2f64 dvy0, dvy1, dvy2, dvy3, dvy4, dvy5, dvy6, dvy7;
v2f64 dot0 = {0, 0};
v2f64 dot1 = {0, 0};
v2f64 dot2 = {0, 0};
v2f64 dot3 = {0, 0};
#else
v4f32 dot0 = {0, 0, 0, 0};
v4f32 dot1 = {0, 0, 0, 0};
v4f32 dot2 = {0, 0, 0, 0};
v4f32 dot3 = {0, 0, 0, 0};
#endif
if (n < 1) return (dot);
@ -83,6 +92,61 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
x_pref += 32;
y_pref += 32;
#if defined(DSDOT)
/* Extend single precision to double precision */
dvy0 = __msa_fexupr_d(vy0);
dvy1 = __msa_fexupr_d(vy1);
dvy2 = __msa_fexupr_d(vy2);
dvy3 = __msa_fexupr_d(vy3);
dvy4 = __msa_fexupr_d(vy4);
dvy5 = __msa_fexupr_d(vy5);
dvy6 = __msa_fexupr_d(vy6);
dvy7 = __msa_fexupr_d(vy7);
vy0 = (v4f32)__msa_fexupl_d(vy0);
vy1 = (v4f32)__msa_fexupl_d(vy1);
vy2 = (v4f32)__msa_fexupl_d(vy2);
vy3 = (v4f32)__msa_fexupl_d(vy3);
vy4 = (v4f32)__msa_fexupl_d(vy4);
vy5 = (v4f32)__msa_fexupl_d(vy5);
vy6 = (v4f32)__msa_fexupl_d(vy6);
vy7 = (v4f32)__msa_fexupl_d(vy7);
dvx0 = __msa_fexupr_d(vx0);
dvx1 = __msa_fexupr_d(vx1);
dvx2 = __msa_fexupr_d(vx2);
dvx3 = __msa_fexupr_d(vx3);
dvx4 = __msa_fexupr_d(vx4);
dvx5 = __msa_fexupr_d(vx5);
dvx6 = __msa_fexupr_d(vx6);
dvx7 = __msa_fexupr_d(vx7);
vx0 = (v4f32)__msa_fexupl_d(vx0);
vx1 = (v4f32)__msa_fexupl_d(vx1);
vx2 = (v4f32)__msa_fexupl_d(vx2);
vx3 = (v4f32)__msa_fexupl_d(vx3);
vx4 = (v4f32)__msa_fexupl_d(vx4);
vx5 = (v4f32)__msa_fexupl_d(vx5);
vx6 = (v4f32)__msa_fexupl_d(vx6);
vx7 = (v4f32)__msa_fexupl_d(vx7);
dot0 += (dvy0 * dvx0);
dot1 += (dvy1 * dvx1);
dot2 += (dvy2 * dvx2);
dot3 += (dvy3 * dvx3);
dot0 += (dvy4 * dvx4);
dot1 += (dvy5 * dvx5);
dot2 += (dvy6 * dvx6);
dot3 += (dvy7 * dvx7);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
dot1 += ((v2f64)vy1 * (v2f64)vx1);
dot2 += ((v2f64)vy2 * (v2f64)vx2);
dot3 += ((v2f64)vy3 * (v2f64)vx3);
dot0 += ((v2f64)vy4 * (v2f64)vx4);
dot1 += ((v2f64)vy5 * (v2f64)vx5);
dot2 += ((v2f64)vy6 * (v2f64)vx6);
dot3 += ((v2f64)vy7 * (v2f64)vx7);
#else
dot0 += (vy0 * vx0);
dot1 += (vy1 * vx1);
dot2 += (vy2 * vx2);
@ -91,6 +155,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
dot1 += (vy5 * vx5);
dot2 += (vy6 * vx6);
dot3 += (vy7 * vx7);
#endif
}
if (n & 31)
@ -100,10 +165,41 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_SP4_INC(x, 4, vx0, vx1, vx2, vx3);
LD_SP4_INC(y, 4, vy0, vy1, vy2, vy3);
#if defined(DSDOT)
dvy0 = __msa_fexupr_d(vy0);
dvy1 = __msa_fexupr_d(vy1);
dvy2 = __msa_fexupr_d(vy2);
dvy3 = __msa_fexupr_d(vy3);
vy0 = (v4f32)__msa_fexupl_d(vy0);
vy1 = (v4f32)__msa_fexupl_d(vy1);
vy2 = (v4f32)__msa_fexupl_d(vy2);
vy3 = (v4f32)__msa_fexupl_d(vy3);
dvx0 = __msa_fexupr_d(vx0);
dvx1 = __msa_fexupr_d(vx1);
dvx2 = __msa_fexupr_d(vx2);
dvx3 = __msa_fexupr_d(vx3);
vx0 = (v4f32)__msa_fexupl_d(vx0);
vx1 = (v4f32)__msa_fexupl_d(vx1);
vx2 = (v4f32)__msa_fexupl_d(vx2);
vx3 = (v4f32)__msa_fexupl_d(vx3);
dot0 += (dvy0 * dvx0);
dot1 += (dvy1 * dvx1);
dot2 += (dvy2 * dvx2);
dot3 += (dvy3 * dvx3);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
dot1 += ((v2f64)vy1 * (v2f64)vx1);
dot2 += ((v2f64)vy2 * (v2f64)vx2);
dot3 += ((v2f64)vy3 * (v2f64)vx3);
#else
dot0 += (vy0 * vx0);
dot1 += (vy1 * vx1);
dot2 += (vy2 * vx2);
dot3 += (vy3 * vx3);
#endif
}
if (n & 8)
@ -111,8 +207,27 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_SP2_INC(x, 4, vx0, vx1);
LD_SP2_INC(y, 4, vy0, vy1);
#if defined(DSDOT)
dvy0 = __msa_fexupr_d(vy0);
dvy1 = __msa_fexupr_d(vy1);
vy0 = (v4f32)__msa_fexupl_d(vy0);
vy1 = (v4f32)__msa_fexupl_d(vy1);
dvx0 = __msa_fexupr_d(vx0);
dvx1 = __msa_fexupr_d(vx1);
vx0 = (v4f32)__msa_fexupl_d(vx0);
vx1 = (v4f32)__msa_fexupl_d(vx1);
dot0 += (dvy0 * dvx0);
dot1 += (dvy1 * dvx1);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
dot1 += ((v2f64)vy1 * (v2f64)vx1);
#else
dot0 += (vy0 * vx0);
dot1 += (vy1 * vx1);
#endif
}
if (n & 4)
@ -120,7 +235,16 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
vx0 = LD_SP(x); x += 4;
vy0 = LD_SP(y); y += 4;
#if defined(DSDOT)
dvy0 = __msa_fexupr_d(vy0);
vy0 = (v4f32)__msa_fexupl_d(vy0);
dvx0 = __msa_fexupr_d(vx0);
vx0 = (v4f32)__msa_fexupl_d(vx0);
dot0 += (dvy0 * dvx0);
dot0 += ((v2f64)vy0 * (v2f64)vx0);
#else
dot0 += (vy0 * vx0);
#endif
}
if (n & 2)
@ -128,8 +252,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_GP2_INC(x, 1, x0, x1);
LD_GP2_INC(y, 1, y0, y1);
#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
dot += ((double)y1 * (double)x1);
#else
dot += (y0 * x0);
dot += (y1 * x1);
#endif
}
if (n & 1)
@ -137,7 +266,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
x0 = *x;
y0 = *y;
#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
#else
dot += (y0 * x0);
#endif
}
}
@ -145,8 +278,10 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
dot += dot0[0];
dot += dot0[1];
#if !defined(DSDOT)
dot += dot0[2];
dot += dot0[3];
#endif
}
else
{
@ -155,10 +290,17 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_GP4_INC(x, inc_x, x0, x1, x2, x3);
LD_GP4_INC(y, inc_y, y0, y1, y2, y3);
#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
dot += ((double)y1 * (double)x1);
dot += ((double)y2 * (double)x2);
dot += ((double)y3 * (double)x3);
#else
dot += (y0 * x0);
dot += (y1 * x1);
dot += (y2 * x2);
dot += (y3 * x3);
#endif
}
if (n & 2)
@ -166,8 +308,13 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
LD_GP2_INC(x, inc_x, x0, x1);
LD_GP2_INC(y, inc_y, y0, y1);
#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
dot += ((double)y1 * (double)x1);
#else
dot += (y0 * x0);
dot += (y1 * x1);
#endif
}
if (n & 1)
@ -175,7 +322,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
x0 = *x;
y0 = *y;
#if defined(DSDOT)
dot += ((double)y0 * (double)x0);
#else
dot += (y0 * x0);
#endif
}
}