bugfix for sgemv_n_4.c

This commit is contained in:
wernsaar 2014-09-04 18:55:52 +02:00
parent 7f910010a0
commit 53de943690
2 changed files with 13 additions and 30 deletions

View File

@ -10,7 +10,7 @@ DSYMV_L_KERNEL = dsymv_L.c
SSYMV_U_KERNEL = ssymv_U.c
SSYMV_L_KERNEL = ssymv_L.c
SGEMVNKERNEL = sgemv_n.c
SGEMVNKERNEL = sgemv_n_4.c
SGEMVTKERNEL = sgemv_t_4.c
ZGEMVNKERNEL = zgemv_n_dup.S

View File

@ -185,8 +185,17 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
ybuffer = buffer;
if ( inc_x == 1 )
{
n1 = n >> 3 ;
n2 = n & 7 ;
}
else
{
n1 = n >> 2 ;
n2 = n & 3 ;
}
m3 = m & 3 ;
m1 = m & -4 ;
@ -258,32 +267,6 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
{
for( i = 0; i < n1 ; i++)
{
xbuffer[0] = x_ptr[0];
x_ptr += inc_x;
xbuffer[1] = x_ptr[0];
x_ptr += inc_x;
xbuffer[2] = x_ptr[0];
x_ptr += inc_x;
xbuffer[3] = x_ptr[0];
x_ptr += inc_x;
xbuffer[4] = x_ptr[0];
x_ptr += inc_x;
xbuffer[5] = x_ptr[0];
x_ptr += inc_x;
xbuffer[6] = x_ptr[0];
x_ptr += inc_x;
xbuffer[7] = x_ptr[0];
x_ptr += inc_x;
sgemv_kernel_4x8(NB,ap,x_ptr,ybuffer,lda4);
ap[0] += lda8;
ap[1] += lda8;
ap[2] += lda8;
ap[3] += lda8;
a_ptr += lda8;
}
if ( n2 & 4 )
{
xbuffer[0] = x_ptr[0];
x_ptr += inc_x;
@ -301,7 +284,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO
a_ptr += lda4;
}
for( i = 0; i < ( n2 & 3) ; i++)
for( i = 0; i < n2 ; i++)
{
xbuffer[0] = x_ptr[0];
x_ptr += inc_x;