diff --git a/kernel/x86/gemv_t_sse.S b/kernel/x86/gemv_t_sse.S index fa6cfc50b..326584bbc 100644 --- a/kernel/x86/gemv_t_sse.S +++ b/kernel/x86/gemv_t_sse.S @@ -142,7 +142,9 @@ .L0t: xorl J,J addl $1,J - sall $21,J # J=2^22 + sall $22,J # J=2^24*sizeof(float)=buffer size(16MB) + subl $8, J # Don't use last 8 float in the buffer. + # Now, split M by block J subl J,MMM # MMM=MMM-J movl J,M jge .L00t diff --git a/kernel/x86/gemv_t_sse2.S b/kernel/x86/gemv_t_sse2.S index d46d7e43e..60d6ef270 100644 --- a/kernel/x86/gemv_t_sse2.S +++ b/kernel/x86/gemv_t_sse2.S @@ -128,7 +128,9 @@ .L0t: xorl J,J addl $1,J - sall $22,J # J=2^22 + sall $21,J # J=2^21*sizeof(double)=buffer size(16MB) + subl $4, J # Don't use last 4 double in the buffer. + # Now, split M by block J subl J,MMM # MMM=MMM-J movl J,M jge .L00t