Adapt ARM architect
This commit is contained in:
parent
1b1a757f5f
commit
60e6c68e38
|
@ -97,7 +97,7 @@ CNRM2KERNEL = znrm2.S
|
||||||
ZNRM2KERNEL = znrm2.S
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
DDOTKERNEL = dot.S
|
DDOTKERNEL = dot.S
|
||||||
SDOTKERNEL = dot.S
|
SDOTKERNEL = ../generic/dot.c
|
||||||
CDOTKERNEL = zdot.S
|
CDOTKERNEL = zdot.S
|
||||||
ZDOTKERNEL = zdot.S
|
ZDOTKERNEL = zdot.S
|
||||||
DSDOTKERNEL = dot.S
|
DSDOTKERNEL = dot.S
|
||||||
|
|
|
@ -97,7 +97,7 @@ CNRM2KERNEL = znrm2.S
|
||||||
ZNRM2KERNEL = znrm2.S
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
DDOTKERNEL = dot.S
|
DDOTKERNEL = dot.S
|
||||||
SDOTKERNEL = dot.S
|
SDOTKERNEL = ../generic/dot.c
|
||||||
CDOTKERNEL = zdot.S
|
CDOTKERNEL = zdot.S
|
||||||
ZDOTKERNEL = zdot.S
|
ZDOTKERNEL = zdot.S
|
||||||
DSDOTKERNEL = dot.S
|
DSDOTKERNEL = dot.S
|
||||||
|
|
|
@ -70,7 +70,7 @@ DCOPYKERNEL = copy.S
|
||||||
CCOPYKERNEL = copy.S
|
CCOPYKERNEL = copy.S
|
||||||
ZCOPYKERNEL = copy.S
|
ZCOPYKERNEL = copy.S
|
||||||
|
|
||||||
SDOTKERNEL = dot.S
|
SDOTKERNEL = ../generic/dot.c
|
||||||
DDOTKERNEL = dot.S
|
DDOTKERNEL = dot.S
|
||||||
CDOTKERNEL = zdot.S
|
CDOTKERNEL = zdot.S
|
||||||
ZDOTKERNEL = zdot.S
|
ZDOTKERNEL = zdot.S
|
||||||
|
|
|
@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include "../simd/intrin.h"
|
||||||
#if defined(DSDOT)
|
#if defined(DSDOT)
|
||||||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
#else
|
#else
|
||||||
|
@ -47,9 +47,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
|
||||||
|
|
||||||
if ( (inc_x == 1) && (inc_y == 1) )
|
if ( (inc_x == 1) && (inc_y == 1) )
|
||||||
{
|
{
|
||||||
int n1 = n & -4;
|
int n1 = n & -4;
|
||||||
#if V_SIMD && !defined(DSDOT)
|
#if V_SIMD && !defined(DSDOT)
|
||||||
const int vstep = v_nlanes_f32;
|
const int vstep = v_nlanes_f32;
|
||||||
const int unrollx4 = n & (-vstep * 4);
|
const int unrollx4 = n & (-vstep * 4);
|
||||||
const int unrollx = n & -vstep;
|
const int unrollx = n & -vstep;
|
||||||
v_f32 vsum0 = v_zero_f32();
|
v_f32 vsum0 = v_zero_f32();
|
||||||
|
|
Loading…
Reference in New Issue