diff --git a/kernel/x86_64/sgemv_t_4.c b/kernel/x86_64/sgemv_t_4.c index 2d0648a6c..fb0ba9741 100644 --- a/kernel/x86_64/sgemv_t_4.c +++ b/kernel/x86_64/sgemv_t_4.c @@ -232,6 +232,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO FLOAT *a_ptr; FLOAT *x_ptr; FLOAT *y_ptr; + BLASLONG n0; BLASLONG n1; BLASLONG m1; BLASLONG m2; @@ -246,7 +247,8 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO xbuffer = buffer; ytemp = buffer + NBMAX; - n1 = n >> 2 ; + n0 = n / NBMAX; + n1 = (n % NBMAX) >> 2 ; n2 = n & 3 ; m3 = m & 3 ; @@ -283,6 +285,32 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *a, BLASLO ap[1] = a_ptr + lda; ap[2] = ap[1] + lda; ap[3] = ap[2] + lda; + + if ( n0 > 0 ) + { + BLASLONG nb1 = NBMAX / 4; + for( j=0; j