diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 9d0080ae7..3508753ee 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -377,7 +377,7 @@ SGEMVNKERNEL = sgemv_n.c endif ifndef SGEMVTKERNEL -SGEMVTKERNEL = ../arm/gemv_t.c +SGEMVTKERNEL = sgemv_t.c endif ifndef DGEMVNKERNEL diff --git a/kernel/x86_64/KERNEL.BULLDOZER b/kernel/x86_64/KERNEL.BULLDOZER index 21fc94701..accdddf0e 100644 --- a/kernel/x86_64/KERNEL.BULLDOZER +++ b/kernel/x86_64/KERNEL.BULLDOZER @@ -3,7 +3,7 @@ SGEMVNKERNEL = sgemv_n.c SGEMVTKERNEL = ../arm/gemv_t.c else SGEMVNKERNEL = sgemv_n.c -SGEMVTKERNEL = sgemv_t_avx.c +SGEMVTKERNEL = sgemv_t.c endif diff --git a/kernel/x86_64/KERNEL.HASWELL b/kernel/x86_64/KERNEL.HASWELL index 3e20fcfc7..878a56b04 100644 --- a/kernel/x86_64/KERNEL.HASWELL +++ b/kernel/x86_64/KERNEL.HASWELL @@ -3,7 +3,7 @@ SGEMVNKERNEL = sgemv_n.c SGEMVTKERNEL = ../arm/gemv_t.c else SGEMVNKERNEL = sgemv_n.c -SGEMVTKERNEL = sgemv_t_avx.c +SGEMVTKERNEL = sgemv_t.c endif diff --git a/kernel/x86_64/KERNEL.NEHALEM b/kernel/x86_64/KERNEL.NEHALEM index 04efa391a..8276150c6 100644 --- a/kernel/x86_64/KERNEL.NEHALEM +++ b/kernel/x86_64/KERNEL.NEHALEM @@ -3,7 +3,7 @@ SGEMVNKERNEL = sgemv_n.c SGEMVTKERNEL = ../arm/gemv_t.c else SGEMVNKERNEL = sgemv_n.c -SGEMVTKERNEL = ../arm/gemv_t.c +SGEMVTKERNEL = sgemv_t.c endif diff --git a/kernel/x86_64/KERNEL.PILEDRIVER b/kernel/x86_64/KERNEL.PILEDRIVER index b7565edeb..7b3c9a7b8 100644 --- a/kernel/x86_64/KERNEL.PILEDRIVER +++ b/kernel/x86_64/KERNEL.PILEDRIVER @@ -3,7 +3,7 @@ SGEMVNKERNEL = sgemv_n.c SGEMVTKERNEL = ../arm/gemv_t.c else SGEMVNKERNEL = sgemv_n.c -SGEMVTKERNEL = sgemv_t_avx.c +SGEMVTKERNEL = sgemv_t.c endif diff --git a/kernel/x86_64/KERNEL.SANDYBRIDGE b/kernel/x86_64/KERNEL.SANDYBRIDGE index 9dae3a41d..26706b61d 100644 --- a/kernel/x86_64/KERNEL.SANDYBRIDGE +++ b/kernel/x86_64/KERNEL.SANDYBRIDGE @@ -3,7 +3,7 @@ SGEMVNKERNEL = sgemv_n.c SGEMVTKERNEL = ../arm/gemv_t.c else SGEMVNKERNEL = sgemv_n.c -SGEMVTKERNEL = sgemv_t_avx.c +SGEMVTKERNEL = sgemv_t.c endif diff --git a/kernel/x86_64/sgemv_t.c b/kernel/x86_64/sgemv_t.c new file mode 100644 index 000000000..89254c256 --- /dev/null +++ b/kernel/x86_64/sgemv_t.c @@ -0,0 +1,200 @@ +/*************************************************************************** +Copyright (c) 2014, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + + +#include "common.h" + +#if defined(BULLDOZER) || defined(PILEDRIVER) +#include "sgemv_t_microk_bulldozer-2.c" +#endif + +/* +#if defined(BULLDOZER) || defined(PILEDRIVER) +#include "sgemv_n_microk_bulldozer-2.c" +#elif defined(HASWELL) +#include "sgemv_n_microk_haswell-2.c" +#elif defined(SANDYBRIDGE) +#include "sgemv_n_microk_sandy-2.c" +#elif defined(NEHALEM) +#include "sgemv_n_microk_nehalem-2.c" +#endif +*/ + +#define NBMAX 4096 + +#ifndef HAVE_KERNEL_16x4 + +static void sgemv_kernel_16x4(BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y) +{ + BLASLONG i; + FLOAT *a0,*a1,*a2,*a3; + a0 = ap[0]; + a1 = ap[1]; + a2 = ap[2]; + a3 = ap[3]; + FLOAT temp0 = 0.0; + FLOAT temp1 = 0.0; + FLOAT temp2 = 0.0; + FLOAT temp3 = 0.0; + + for ( i=0; i< n; i+=4 ) + { + temp0 += a0[i]*x[i] + a0[i+1]*x[i+1] + a0[i+2]*x[i+2] + a0[i+3]*x[i+3]; + temp1 += a1[i]*x[i] + a1[i+1]*x[i+1] + a1[i+2]*x[i+2] + a1[i+3]*x[i+3]; + temp2 += a2[i]*x[i] + a2[i+1]*x[i+1] + a2[i+2]*x[i+2] + a2[i+3]*x[i+3]; + temp3 += a3[i]*x[i] + a3[i+1]*x[i+1] + a3[i+2]*x[i+2] + a3[i+3]*x[i+3]; + } + y[0] = temp0; + y[1] = temp1; + y[2] = temp2; + y[3] = temp3; +} + +#endif + +static void sgemv_kernel_16x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y) +{ + BLASLONG i; + FLOAT *a0; + a0 = ap; + FLOAT temp = 0.0; + + for ( i=0; i< n; i+=4 ) + { + temp += a0[i]*x[i] + a0[i+1]*x[i+1] + a0[i+2]*x[i+2] + a0[i+3]*x[i+3]; + } + *y = temp; +} + +static void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) +{ + BLASLONG i; + for ( i=0; i