Add trivially optimized DSDOT for POWER8

This commit is contained in:
martin 2017-11-28 18:38:07 +01:00
parent 5056a044b2
commit 7a4b3cfbf8
2 changed files with 47 additions and 9 deletions

View File

@ -122,6 +122,7 @@ ZCOPYKERNEL = zcopy.c
# #
SDOTKERNEL = sdot.c SDOTKERNEL = sdot.c
DDOTKERNEL = ddot.c DDOTKERNEL = ddot.c
DSDOTKERNEL = sdot.c
#CDOTKERNEL = ../arm/zdot.c #CDOTKERNEL = ../arm/zdot.c
ZDOTKERNEL = zdot.c ZDOTKERNEL = zdot.c
# #

View File

@ -1,5 +1,5 @@
/*************************************************************************** /***************************************************************************
Copyright (c) 2013-2016, The OpenBLAS Project Copyright (c) 2013-2017, The OpenBLAS Project
All rights reserved. All rights reserved.
Redistribution and use in source and binary forms, with or without Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are modification, are permitted provided that the following conditions are
@ -66,42 +66,76 @@ static FLOAT sdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y)
#endif #endif
#if defined (DSDOT)
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{ {
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
double dot = 0.0 ;
FLOAT dot = 0.0 ; #if defined (DSDOT)
double mydot = 0.0;
FLOAT asmdot = 0.0;
#else
FLOAT mydot=0.0;
#endif
BLASLONG n1;
if ( n <= 0 ) return(dot); if ( n <= 0 ) return(dot);
if ( (inc_x == 1) && (inc_y == 1) ) if ( (inc_x == 1) && (inc_y == 1) )
{ {
BLASLONG n1 = n & -32; n1 = n & (BLASLONG)(-32);
if ( n1 ) if ( n1 )
dot = sdot_kernel_16(n1, x, y); #if defined(DSDOT)
{
FLOAT *x1=x;
FLOAT *y1=y;
BLASLONG n2 = 32;
while (i<n1) {
asmdot = sdot_kernel_16(n2, x1, y1);
mydot += (double)asmdot;
asmdot=0.;
x1+=32;
y1+=32;
i+=32;
}
}
#else
mydot = sdot_kernel_16(n1, x, y);
#endif
i = n1; i = n1;
while(i < n) while(i < n)
{ {
#if defined(DSDOT)
dot += (double)y[i] * (double)x[i] ;
#else
dot += y[i] * x[i] ; dot += y[i] * x[i] ;
#endif
i++ ; i++ ;
} }
dot+=mydot;
return(dot); return(dot);
} }
BLASLONG n1 = n & -2; n1 = n & (BLASLONG)(-2);
while(i < n1) while(i < n1)
{ {
#if defined (DSDOT)
dot += (double)y[iy] * (double)x[ix] + (double)y[iy+inc_y] * (double)x[ix+inc_x];
#else
dot += y[iy] * x[ix] + y[iy+inc_y] * x[ix+inc_x]; dot += y[iy] * x[ix] + y[iy+inc_y] * x[ix+inc_x];
#endif
ix += inc_x*2 ; ix += inc_x*2 ;
iy += inc_y*2 ; iy += inc_y*2 ;
i+=2 ; i+=2 ;
@ -110,8 +144,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
while(i < n) while(i < n)
{ {
#if defined (DSDOT)
dot += (double)y[iy] * (double)x[ix] ;
#else
dot += y[iy] * x[ix] ; dot += y[iy] * x[ix] ;
#endif
ix += inc_x ; ix += inc_x ;
iy += inc_y ; iy += inc_y ;
i++ ; i++ ;