diff --git a/kernel/x86_64/KERNEL.HASWELL b/kernel/x86_64/KERNEL.HASWELL index f2e1374d3..848de38df 100644 --- a/kernel/x86_64/KERNEL.HASWELL +++ b/kernel/x86_64/KERNEL.HASWELL @@ -24,6 +24,8 @@ DDOTKERNEL = ddot.c CDOTKERNEL = cdot.c ZDOTKERNEL = zdot.c +DSDOTKERNEL = sdot.c + SAXPYKERNEL = saxpy.c DAXPYKERNEL = daxpy.c CAXPYKERNEL = caxpy.c diff --git a/kernel/x86_64/sdot.c b/kernel/x86_64/sdot.c index 389252f8b..b6f3c21af 100644 --- a/kernel/x86_64/sdot.c +++ b/kernel/x86_64/sdot.c @@ -68,13 +68,22 @@ static void sdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d) #endif +#if defined (DSDOT) +double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#else FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) +#endif { BLASLONG i=0; BLASLONG ix=0,iy=0; double dot = 0.0 ; +#if defined (DSDOT) + double mydot = 0.0; + FLOAT asmdot = 0.0; +#else FLOAT mydot=0.0; +#endif BLASLONG n1; if ( n <= 0 ) return(dot); @@ -85,17 +94,35 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) n1 = n & (BLASLONG)(-32); if ( n1 ) +#if defined(DSDOT) + { + FLOAT *x1=x; + FLOAT *y1=y; + BLASLONG n2 = 32; + while (i