From 5e3e91d0fc5562782ddac9c01d6765cb24f171a6 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 22 Oct 2017 18:18:51 +0200 Subject: [PATCH] Split the microkernel workload into chunks of 32 floats for dsdot mode to limit loss of precision --- kernel/x86_64/sdot.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/kernel/x86_64/sdot.c b/kernel/x86_64/sdot.c index f786d1895..b6f3c21af 100644 --- a/kernel/x86_64/sdot.c +++ b/kernel/x86_64/sdot.c @@ -78,7 +78,12 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) BLASLONG ix=0,iy=0; double dot = 0.0 ; +#if defined (DSDOT) + double mydot = 0.0; + FLOAT asmdot = 0.0; +#else FLOAT mydot=0.0; +#endif BLASLONG n1; if ( n <= 0 ) return(dot); @@ -89,9 +94,23 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) n1 = n & (BLASLONG)(-32); if ( n1 ) +#if defined(DSDOT) + { + FLOAT *x1=x; + FLOAT *y1=y; + BLASLONG n2 = 32; + while (i