optimized dgemv_t kernel for haswell

This commit is contained in:
wernsaar 2014-09-09 14:38:08 +02:00
parent debc6d1a05
commit 8109d8232c
2 changed files with 12 additions and 20 deletions

View File

@ -28,17 +28,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "common.h"
/*
#if defined(NEHALEM)
#include "dgemv_t_microk_nehalem-4.c"
#elif defined(BULLDOZER) || defined(PILEDRIVER)
#include "dgemv_t_microk_bulldozer-4.c"
#elif defined(SANDYBRIDGE)
#include "dgemv_t_microk_sandy-4.c"
#elif defined(HASWELL)
#if defined(HASWELL)
#include "dgemv_t_microk_haswell-4.c"
#endif
*/
#define NBMAX 2048

View File

@ -61,25 +61,25 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
".align 16 \n\t"
".L01LOOP%=: \n\t"
"prefetcht0 384(%2,%0,8) \n\t"
// "prefetcht0 384(%2,%0,8) \n\t"
"vmovups (%2,%0,8), %%ymm12 \n\t" // 4 * x
"vmovups 32(%2,%0,8), %%ymm13 \n\t" // 4 * x
"prefetcht0 384(%4,%0,8) \n\t"
// "prefetcht0 384(%4,%0,8) \n\t"
"vfmadd231pd (%4,%0,8), %%ymm12, %%ymm4 \n\t"
"vfmadd231pd (%5,%0,8), %%ymm12, %%ymm5 \n\t"
"prefetcht0 384(%5,%0,8) \n\t"
"vfmadd231pd 32(%4,%0,8), %%ymm13, %%ymm4 \n\t"
"vfmadd231pd 32(%5,%0,8), %%ymm13, %%ymm5 \n\t"
"prefetcht0 384(%6,%0,8) \n\t"
// "prefetcht0 384(%5,%0,8) \n\t"
"vfmadd231pd (%6,%0,8), %%ymm12, %%ymm6 \n\t"
"vfmadd231pd (%7,%0,8), %%ymm12, %%ymm7 \n\t"
"prefetcht0 384(%7,%0,8) \n\t"
"vfmadd231pd 32(%6,%0,8), %%ymm13, %%ymm6 \n\t"
"vfmadd231pd 32(%7,%0,8), %%ymm13, %%ymm7 \n\t"
// "prefetcht0 384(%6,%0,8) \n\t"
"vfmadd231pd 32(%4,%0,8), %%ymm13, %%ymm4 \n\t"
"vfmadd231pd 32(%5,%0,8), %%ymm13, %%ymm5 \n\t"
"addq $8 , %0 \n\t"
// "prefetcht0 384(%7,%0,8) \n\t"
"vfmadd231pd -32(%6,%0,8), %%ymm13, %%ymm6 \n\t"
"subq $8 , %1 \n\t"
"vfmadd231pd -32(%7,%0,8), %%ymm13, %%ymm7 \n\t"
"addq $8 , %0 \n\t"
"subq $8 , %1 \n\t"
"jnz .L01LOOP%= \n\t"
".L16END%=: \n\t"