From 0273966abb890c1a9ec8bcf06d8b07d60be564fc Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Fri, 24 Apr 2015 11:39:17 +0200 Subject: [PATCH] optimized daxpy kernel for increments != 1 --- kernel/x86_64/daxpy.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/kernel/x86_64/daxpy.c b/kernel/x86_64/daxpy.c index 65955f33d..10cc573db 100644 --- a/kernel/x86_64/daxpy.c +++ b/kernel/x86_64/daxpy.c @@ -101,6 +101,27 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS } + BLASLONG n1 = n & -4; + + while(i < n1) + { + + FLOAT m1 = da * x[ix] ; + FLOAT m2 = da * x[ix+inc_x] ; + FLOAT m3 = da * x[ix+2*inc_x] ; + FLOAT m4 = da * x[ix+3*inc_x] ; + + y[iy] += m1 ; + y[iy+inc_y] += m2 ; + y[iy+2*inc_y] += m3 ; + y[iy+3*inc_y] += m4 ; + + ix += inc_x*4 ; + iy += inc_y*4 ; + i+=4 ; + + } + while(i < n) {