corrected and testet FMA3 Code

This commit is contained in:
wernsaar 2013-10-19 10:52:20 +02:00
parent f51a849d91
commit f6b50057e2
4 changed files with 83 additions and 28 deletions

View File

@ -37,6 +37,11 @@
/*********************************************************************/ /*********************************************************************/
/********************************************************************* /*********************************************************************
* 2013/10/19 Saar
* BLASTEST :
* CTEST : OK
* TEST : OK
*
* 2013/08/16 Saar * 2013/08/16 Saar
* Parameter: * Parameter:
* CGEMM_DEFAULT_UNROLL_N 2 * CGEMM_DEFAULT_UNROLL_N 2
@ -139,7 +144,7 @@
#endif #endif
#if defined(BULLDOZER) || defined(PILEDRIVER) #if defined(BULLDOZER)
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) #if defined(NN) || defined(NT) || defined(TN) || defined(TT)
@ -188,41 +193,41 @@
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) #if defined(NN) || defined(NT) || defined(TN) || defined(TT)
.macro VFMADDPS_R y0,y1,y2 .macro VFMADDPS_R y0,y1,y2
vfmadd231ps \y0,\y1,\y2 vfmadd231ps \y1,\y2,\y0
.endm .endm
.macro VFMADDPS_I y0,y1,y2 .macro VFMADDPS_I y0,y1,y2
vfmadd231ps \y0,\y1,\y2 vfmadd231ps \y1,\y2,\y0
.endm .endm
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT) #elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
.macro VFMADDPS_R y0,y1,y2 .macro VFMADDPS_R y0,y1,y2
vfnmadd231ps \y0,\y1,\y2 vfnmadd231ps \y1,\y2,\y0
.endm .endm
.macro VFMADDPS_I y0,y1,y2 .macro VFMADDPS_I y0,y1,y2
vfmadd231ps \y0,\y1,\y2 vfmadd231ps \y1,\y2,\y0
.endm .endm
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC) #elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
.macro VFMADDPS_R y0,y1,y2 .macro VFMADDPS_R y0,y1,y2
vfmadd231ps \y0,\y1,\y2 vfmadd231ps \y1,\y2,\y0
.endm .endm
.macro VFMADDPS_I y0,y1,y2 .macro VFMADDPS_I y0,y1,y2
vfnmadd231ps \y0,\y1,\y2 vfnmadd231ps \y1,\y2,\y0
.endm .endm
#else #else
.macro VFMADDPS_R y0,y1,y2 .macro VFMADDPS_R y0,y1,y2
vfnmadd231ps \y0,\y1,\y2 vfnmadd231ps \y1,\y2,\y0
.endm .endm
.macro VFMADDPS_I y0,y1,y2 .macro VFMADDPS_I y0,y1,y2
vfnmadd231ps \y0,\y1,\y2 vfnmadd231ps \y1,\y2,\y0
.endm .endm
#endif #endif

View File

@ -36,6 +36,31 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
/*********************************************************************
* 2013/10/19 Saar
* BLASTEST :
* CTEST : OK
* TEST : OK
*
*
* 2013/08/15 Saar
* Parameter:
* SGEMM_DEFAULT_UNROLL_N 2
* SGEMM_DEFAULT_UNROLL_M 16
* SGEMM_DEFAULT_P 384
* SGEMM_DEFAULT_Q 168
*
* BLASTEST: OK
*
* Performance:
* 1 thread: 2.31 times faster than sandybridge
* 4 threads: 2.26 times faster than sandybridge
*
* Compile for FMA3: OK
*
*********************************************************************/
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
@ -130,11 +155,11 @@
#else #else
.macro VFMADD231PD_ y0,y1,y2 .macro VFMADD231PD_ y0,y1,y2
vfmadd231pd \y0,\y1,\y2 vfmadd231pd \y2,\y1,\y0
.endm .endm
.macro VFMADD231SD_ x0,x1,x2 .macro VFMADD231SD_ x0,x1,x2
vfmadd231sd \x0,\x1,\x2 vfmadd231sd \x2,\x1,\x0
.endm .endm
#endif #endif

View File

@ -36,6 +36,28 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/
/*********************************************************************
* 2013/10/19 Saar
* BLASTEST :
* CTEST : OK
* TEST : OK
*
* 2013/08/15 Saar
* Parameter:
* SGEMM_DEFAULT_UNROLL_N 4
* SGEMM_DEFAULT_UNROLL_M 16
* SGEMM_DEFAULT_P 768
* SGEMM_DEFAULT_Q 168
*
* BLASTEST: OK
*
* Performance:
* 1 thread: 2.22 times faster than sandybridge
* 4 threads: 2.26 times faster than sandybridge
*
* Compile for FMA3: OK
*
*********************************************************************/
#define ASSEMBLER #define ASSEMBLER
#include "common.h" #include "common.h"
@ -60,7 +82,6 @@
#define SP %rbx #define SP %rbx
#define BO1 %rdi #define BO1 %rdi
#define BO2 %r15
#define CO2 %rdx #define CO2 %rdx
#ifndef WINDOWS_ABI #ifndef WINDOWS_ABI
@ -131,11 +152,11 @@
#else #else
.macro VFMADD231PS_ y0,y1,y2 .macro VFMADD231PS_ y0,y1,y2
vfmadd231ps \y0,\y1,\y2 vfmadd231ps \y1,\y2,\y0
.endm .endm
.macro VFMADD231SS_ x0,x1,x2 .macro VFMADD231SS_ x0,x1,x2
vfmadd231ss \x0,\x1,\x2 vfmadd231ss \x1,\x2,\x0
.endm .endm
#endif #endif
@ -791,7 +812,7 @@
movq OLD_C, C movq OLD_C, C
movq OLD_LDC, LDC movq OLD_LDC, LDC
#ifdef TRMMKERNEL #ifdef TRMMKERNEL
movsd OLD_OFFSET, %xmm12 vmovsd OLD_OFFSET, %xmm12
#endif #endif
vmovaps %xmm3, %xmm0 vmovaps %xmm3, %xmm0
@ -836,8 +857,8 @@
#ifdef TRMMKERNEL #ifdef TRMMKERNEL
vmovss %xmm12, OFFSET vmovsd %xmm12, OFFSET
vmovss %xmm12, KK vmovsd %xmm12, KK
#ifndef LEFT #ifndef LEFT
negq KK negq KK
#endif #endif
@ -1629,7 +1650,7 @@
.L4_60: .L4_60:
#if defined(TRMMKERNEL) && !defined(LEFT) #if defined(TRMMKERNEL) && !defined(LEFT)
addq $2, KK addq $4, KK
#endif #endif
decq J // j -- decq J // j --

View File

@ -37,6 +37,11 @@
/*********************************************************************/ /*********************************************************************/
/********************************************************************* /*********************************************************************
* 2013/10/19 Saar
* BLASTEST :
* CTEST : OK
* TEST : OK
*
* 2013/08/16 Saar * 2013/08/16 Saar
* Parameter: * Parameter:
* ZGEMM_DEFAULT_UNROLL_N 2 * ZGEMM_DEFAULT_UNROLL_N 2
@ -44,7 +49,6 @@
* ZGEMM_DEFAULT_P 112 * ZGEMM_DEFAULT_P 112
* ZGEMM_DEFAULT_Q 224 * ZGEMM_DEFAULT_Q 224
* *
* BLASTEST: OK
* *
* Performance: * Performance:
* 1 thread: 1.80 times faster than sandybridge * 1 thread: 1.80 times faster than sandybridge
@ -138,7 +142,7 @@
#endif #endif
#if defined(BULLDOZER) || defined(PILEDRIVER) #if defined(BULLDOZER)
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) #if defined(NN) || defined(NT) || defined(TN) || defined(TT)
@ -187,41 +191,41 @@
#if defined(NN) || defined(NT) || defined(TN) || defined(TT) #if defined(NN) || defined(NT) || defined(TN) || defined(TT)
.macro VFMADDPD_R y0,y1,y2 .macro VFMADDPD_R y0,y1,y2
vfmadd231pd \y0,\y1,\y2 vfmadd231pd \y1,\y2,\y0
.endm .endm
.macro VFMADDPD_I y0,y1,y2 .macro VFMADDPD_I y0,y1,y2
vfmadd231pd \y0,\y1,\y2 vfmadd231pd \y1,\y2,\y0
.endm .endm
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT) #elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
.macro VFMADDPD_R y0,y1,y2 .macro VFMADDPD_R y0,y1,y2
vfnmadd231pd \y0,\y1,\y2 vfnmadd231pd \y1,\y2,\y0
.endm .endm
.macro VFMADDPD_I y0,y1,y2 .macro VFMADDPD_I y0,y1,y2
vfmadd231pd \y0,\y1,\y2 vfmadd231pd \y1,\y2,\y0
.endm .endm
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC) #elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
.macro VFMADDPD_R y0,y1,y2 .macro VFMADDPD_R y0,y1,y2
vfmadd231pd \y0,\y1,\y2 vfmadd231pd \y1,\y2,\y0
.endm .endm
.macro VFMADDPD_I y0,y1,y2 .macro VFMADDPD_I y0,y1,y2
vfnmadd231pd \y0,\y1,\y2 vfnmadd231pd \y1,\y2,\y0
.endm .endm
#else #else
.macro VFMADDPD_R y0,y1,y2 .macro VFMADDPD_R y0,y1,y2
vfnmadd231pd \y0,\y1,\y2 vfnmadd231pd \y1,\y2,\y0
.endm .endm
.macro VFMADDPD_I y0,y1,y2 .macro VFMADDPD_I y0,y1,y2
vfnmadd231pd \y0,\y1,\y2 vfnmadd231pd \y1,\y2,\y0
.endm .endm
#endif #endif