diff --git a/kernel/x86_64/cgemv_n_4.c b/kernel/x86_64/cgemv_n_4.c index 162b7d99f..2a5afd5b3 100644 --- a/kernel/x86_64/cgemv_n_4.c +++ b/kernel/x86_64/cgemv_n_4.c @@ -30,7 +30,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "common.h" #if defined(HASWELL) -#include "cgemv_n_microk_haswell-2.c" +#include "cgemv_n_microk_haswell-4.c" #endif @@ -73,6 +73,41 @@ static void cgemv_kernel_4x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) #endif + + +#ifndef HAVE_KERNEL_4x2 + +static void cgemv_kernel_4x2(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) +{ + BLASLONG i; + FLOAT *a0,*a1; + a0 = ap[0]; + a1 = ap[1]; + + for ( i=0; i< 2*n; i+=2 ) + { +#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) ) + y[i] += a0[i]*x[0] - a0[i+1] * x[1]; + y[i+1] += a0[i]*x[1] + a0[i+1] * x[0]; + y[i] += a1[i]*x[2] - a1[i+1] * x[3]; + y[i+1] += a1[i]*x[3] + a1[i+1] * x[2]; +#else + y[i] += a0[i]*x[0] + a0[i+1] * x[1]; + y[i+1] += a0[i]*x[1] - a0[i+1] * x[0]; + y[i] += a1[i]*x[2] + a1[i+1] * x[3]; + y[i+1] += a1[i]*x[3] - a1[i+1] * x[2]; +#endif + } +} + +#endif + + + + +#ifndef HAVE_KERNEL_4x1 + + static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) { BLASLONG i; @@ -91,41 +126,85 @@ static void cgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) } } - - -static void zero_y(BLASLONG n, FLOAT *dest) -{ - BLASLONG i; - for ( i=0; i<2*n; i++ ) - { - *dest = 0.0; - dest++; - } -} +#endif + + +static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT alpha_r, FLOAT alpha_i) __attribute__ ((noinline)); static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT alpha_r, FLOAT alpha_i) { BLASLONG i; - FLOAT temp_r; - FLOAT temp_i; - for ( i=0; i