changes for compatibility with Pathscale compiler
This commit is contained in:
parent
6216ab8a7e
commit
f1db386211
20
common_x86.h
20
common_x86.h
|
@ -171,11 +171,6 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
|||
#define MMXSTORE movd
|
||||
#endif
|
||||
|
||||
#if defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
//Enable some optimazation for nehalem.
|
||||
#define NEHALEM_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
|
@ -306,12 +301,25 @@ REALNAME:
|
|||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(C_PATHSCALE) || defined(OS_DARWIN)
|
||||
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
||||
#else
|
||||
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef XDOUBLE
|
||||
#define FLD fldt
|
||||
#define FST fstpt
|
||||
|
|
|
@ -218,12 +218,6 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){
|
|||
|
||||
#ifdef ASSEMBLER
|
||||
|
||||
#if defined(SANDYBRIDGE) || defined(HASWELL)
|
||||
//Enable some optimazation for nehalem.
|
||||
#define NEHALEM_OPTIMIZATION
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(PILEDRIVER) || defined(BULLDOZER)
|
||||
//Enable some optimazation for barcelona.
|
||||
#define BARCELONA_OPTIMIZATION
|
||||
|
@ -378,10 +372,20 @@ REALNAME:
|
|||
#define PROFCODE
|
||||
#endif
|
||||
|
||||
#if defined(C_PATHSCALE) || defined(OS_DARWIN)
|
||||
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",@progbits
|
||||
|
||||
#else
|
||||
|
||||
#define EPILOGUE \
|
||||
.size REALNAME, .-REALNAME; \
|
||||
.section .note.GNU-stack,"",%progbits
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************
|
||||
* 2013/10/28 Saar
|
||||
* 2013/11/13 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
|
@ -138,43 +138,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfmaddps y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfmaddps y0,y1,y2,y0
|
||||
|
||||
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfnmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfnmaddps y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfmaddps y0,y1,y2,y0
|
||||
|
||||
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfmaddps y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfnmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfnmaddps y0,y1,y2,y0
|
||||
|
||||
#else
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfnmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfnmaddps y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfnmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfnmaddps y0,y1,y2,y0
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -182,43 +166,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfmadd231ps y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfmadd231ps y1,y2,y0
|
||||
|
||||
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfnmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfnmadd231ps y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfmadd231ps y1,y2,y0
|
||||
|
||||
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfmadd231ps y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfnmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfnmadd231ps y1,y2,y0
|
||||
|
||||
#else
|
||||
|
||||
.macro VFMADDPS_R y0,y1,y2
|
||||
vfnmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_R( y0,y1,y2 ) vfnmadd231ps y1,y2,y0
|
||||
|
||||
.macro VFMADDPS_I y0,y1,y2
|
||||
vfnmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPS_I( y0,y1,y2 ) vfnmadd231ps y1,y2,y0
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -234,18 +202,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
|
||||
vbroadcastss -8 * SIZE(BO, BI, SIZE), %ymm4
|
||||
VFMADDPS_R %ymm8,%ymm4,%ymm0
|
||||
VFMADDPS_R( %ymm8,%ymm4,%ymm0 )
|
||||
vmovups -8 * SIZE(AO, %rax, SIZE), %ymm1
|
||||
VFMADDPS_R %ymm12,%ymm4,%ymm1
|
||||
VFMADDPS_R( %ymm12,%ymm4,%ymm1 )
|
||||
vbroadcastss -7 * SIZE(BO, BI, SIZE), %ymm5
|
||||
VFMADDPS_I %ymm9,%ymm5,%ymm0
|
||||
VFMADDPS_I %ymm13,%ymm5,%ymm1
|
||||
VFMADDPS_I( %ymm9,%ymm5,%ymm0 )
|
||||
VFMADDPS_I( %ymm13,%ymm5,%ymm1 )
|
||||
vbroadcastss -6 * SIZE(BO, BI, SIZE), %ymm6
|
||||
VFMADDPS_R %ymm10,%ymm6,%ymm0
|
||||
VFMADDPS_R %ymm14,%ymm6,%ymm1
|
||||
VFMADDPS_R( %ymm10,%ymm6,%ymm0 )
|
||||
VFMADDPS_R( %ymm14,%ymm6,%ymm1 )
|
||||
vbroadcastss -5 * SIZE(BO, BI, SIZE), %ymm7
|
||||
VFMADDPS_I %ymm11,%ymm7,%ymm0
|
||||
VFMADDPS_I %ymm15,%ymm7,%ymm1
|
||||
VFMADDPS_I( %ymm11,%ymm7,%ymm0 )
|
||||
VFMADDPS_I( %ymm15,%ymm7,%ymm1 )
|
||||
addq $4 , BI
|
||||
addq $16, %rax
|
||||
.endm
|
||||
|
@ -338,18 +306,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL4x2_SUB
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -8 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPS_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPS_R( %xmm8,%xmm4,%xmm0 )
|
||||
vmovups -12 * SIZE(AO, %rax, SIZE), %xmm1
|
||||
VFMADDPS_R %xmm12,%xmm4,%xmm1
|
||||
VFMADDPS_R( %xmm12,%xmm4,%xmm1 )
|
||||
vbroadcastss -7 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPS_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPS_I %xmm13,%xmm5,%xmm1
|
||||
VFMADDPS_I( %xmm9,%xmm5,%xmm0 )
|
||||
VFMADDPS_I( %xmm13,%xmm5,%xmm1 )
|
||||
vbroadcastss -6 * SIZE(BO, BI, SIZE), %xmm6
|
||||
VFMADDPS_R %xmm10,%xmm6,%xmm0
|
||||
VFMADDPS_R %xmm14,%xmm6,%xmm1
|
||||
VFMADDPS_R( %xmm10,%xmm6,%xmm0 )
|
||||
VFMADDPS_R( %xmm14,%xmm6,%xmm1 )
|
||||
vbroadcastss -5 * SIZE(BO, BI, SIZE), %xmm7
|
||||
VFMADDPS_I %xmm11,%xmm7,%xmm0
|
||||
VFMADDPS_I %xmm15,%xmm7,%xmm1
|
||||
VFMADDPS_I( %xmm11,%xmm7,%xmm0 )
|
||||
VFMADDPS_I( %xmm15,%xmm7,%xmm1 )
|
||||
addq $4, BI
|
||||
addq $8, %rax
|
||||
.endm
|
||||
|
@ -437,13 +405,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL2x2_SUB
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -8 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPS_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPS_R( %xmm8,%xmm4,%xmm0 )
|
||||
vbroadcastss -7 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPS_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPS_I( %xmm9,%xmm5,%xmm0 )
|
||||
vbroadcastss -6 * SIZE(BO, BI, SIZE), %xmm6
|
||||
VFMADDPS_R %xmm10,%xmm6,%xmm0
|
||||
VFMADDPS_R( %xmm10,%xmm6,%xmm0 )
|
||||
vbroadcastss -5 * SIZE(BO, BI, SIZE), %xmm7
|
||||
VFMADDPS_I %xmm11,%xmm7,%xmm0
|
||||
VFMADDPS_I( %xmm11,%xmm7,%xmm0 )
|
||||
addq $4, BI
|
||||
addq $4, %rax
|
||||
.endm
|
||||
|
@ -509,13 +477,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL1x2_SUB
|
||||
vmovsd -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -8 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPS_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPS_R( %xmm8,%xmm4,%xmm0 )
|
||||
vbroadcastss -7 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPS_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPS_I( %xmm9,%xmm5,%xmm0 )
|
||||
vbroadcastss -6 * SIZE(BO, BI, SIZE), %xmm6
|
||||
VFMADDPS_R %xmm10,%xmm6,%xmm0
|
||||
VFMADDPS_R( %xmm10,%xmm6,%xmm0 )
|
||||
vbroadcastss -5 * SIZE(BO, BI, SIZE), %xmm7
|
||||
VFMADDPS_I %xmm11,%xmm7,%xmm0
|
||||
VFMADDPS_I( %xmm11,%xmm7,%xmm0 )
|
||||
addq $4, BI
|
||||
addq $2, %rax
|
||||
.endm
|
||||
|
@ -583,11 +551,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
|
||||
vmovups -8 * SIZE(AO, %rax, SIZE), %ymm1
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm4
|
||||
VFMADDPS_R %ymm8,%ymm4,%ymm0
|
||||
VFMADDPS_R %ymm12,%ymm4,%ymm1
|
||||
VFMADDPS_R( %ymm8,%ymm4,%ymm0 )
|
||||
VFMADDPS_R( %ymm12,%ymm4,%ymm1 )
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %ymm5
|
||||
VFMADDPS_I %ymm9,%ymm5,%ymm0
|
||||
VFMADDPS_I %ymm13,%ymm5,%ymm1
|
||||
VFMADDPS_I( %ymm9,%ymm5,%ymm0 )
|
||||
VFMADDPS_I( %ymm13,%ymm5,%ymm1 )
|
||||
addq $2 , BI
|
||||
addq $16, %rax
|
||||
.endm
|
||||
|
@ -654,12 +622,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL4x1_SUB
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPS_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPS_R( %xmm8,%xmm4,%xmm0 )
|
||||
vmovups -12 * SIZE(AO, %rax, SIZE), %xmm1
|
||||
VFMADDPS_R %xmm12,%xmm4,%xmm1
|
||||
VFMADDPS_R( %xmm12,%xmm4,%xmm1 )
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPS_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPS_I %xmm13,%xmm5,%xmm1
|
||||
VFMADDPS_I( %xmm9,%xmm5,%xmm0 )
|
||||
VFMADDPS_I( %xmm13,%xmm5,%xmm1 )
|
||||
addq $2, BI
|
||||
addq $8, %rax
|
||||
.endm
|
||||
|
@ -723,9 +691,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL2x1_SUB
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPS_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPS_R( %xmm8,%xmm4,%xmm0 )
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPS_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPS_I( %xmm9,%xmm5,%xmm0 )
|
||||
addq $2, BI
|
||||
addq $4, %rax
|
||||
.endm
|
||||
|
@ -778,9 +746,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL1x1_SUB
|
||||
vmovsd -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPS_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPS_R( %xmm8,%xmm4,%xmm0 )
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPS_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPS_I( %xmm9,%xmm5,%xmm0 )
|
||||
addq $2, BI
|
||||
addq $2, %rax
|
||||
.endm
|
||||
|
|
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
**********************************************************************************/
|
||||
|
||||
/*********************************************************************
|
||||
* 2013/10/28 Saar
|
||||
* 2013/11/13 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
|
@ -131,23 +131,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(BULLDOZER)
|
||||
|
||||
.macro VFMADD231PS_ y0,y1,y2
|
||||
vfmaddps \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADD231PS_( y0,y1,y2 ) vfmaddps y0,y1,y2,y0
|
||||
|
||||
.macro VFMADD231SS_ x0,x1,x2
|
||||
vfmaddss \x0,\x1,\x2,\x0
|
||||
.endm
|
||||
#define VFMADD231SS_( x0,x1,x2 ) vfmaddss x0,x1,x2,x0
|
||||
|
||||
#else
|
||||
|
||||
.macro VFMADD231PS_ y0,y1,y2
|
||||
vfmadd231ps \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADD231PS_( y0,y1,y2 ) vfmadd231ps y1,y2,y0
|
||||
|
||||
.macro VFMADD231SS_ x0,x1,x2
|
||||
vfmadd231ss \x1,\x2,\x0
|
||||
.endm
|
||||
#define VFMADD231SS_( x0,x1,x2 ) vfmadd231ss x1,x2,x0
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -164,16 +156,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -8 * SIZE(AO, %rax, SIZE), %ymm1
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_ %ymm4,%ymm2,%ymm0
|
||||
VFMADD231PS_ %ymm5,%ymm2,%ymm1
|
||||
VFMADD231PS_ %ymm6,%ymm3,%ymm0
|
||||
VFMADD231PS_ %ymm7,%ymm3,%ymm1
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm5,%ymm2,%ymm1 )
|
||||
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
|
||||
VFMADD231PS_( %ymm7,%ymm3,%ymm1 )
|
||||
vbroadcastss -2 * SIZE(BO, BI, SIZE), %ymm2
|
||||
vbroadcastss -1 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_ %ymm8,%ymm2,%ymm0
|
||||
VFMADD231PS_ %ymm9,%ymm2,%ymm1
|
||||
VFMADD231PS_ %ymm10,%ymm3,%ymm0
|
||||
VFMADD231PS_ %ymm11,%ymm3,%ymm1
|
||||
VFMADD231PS_( %ymm8,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm9,%ymm2,%ymm1 )
|
||||
VFMADD231PS_( %ymm10,%ymm3,%ymm0 )
|
||||
VFMADD231PS_( %ymm11,%ymm3,%ymm1 )
|
||||
addq $4 , BI
|
||||
addq $16, %rax
|
||||
.endm
|
||||
|
@ -235,12 +227,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_ %ymm4,%ymm2,%ymm0
|
||||
VFMADD231PS_ %ymm6,%ymm3,%ymm0
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
|
||||
vbroadcastss -2 * SIZE(BO, BI, SIZE), %ymm2
|
||||
vbroadcastss -1 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_ %ymm8,%ymm2,%ymm0
|
||||
VFMADD231PS_ %ymm10,%ymm3,%ymm0
|
||||
VFMADD231PS_( %ymm8,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm10,%ymm3,%ymm0 )
|
||||
addq $4 , BI
|
||||
addq $8 , %rax
|
||||
.endm
|
||||
|
@ -279,12 +271,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231PS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231PS_ %xmm6,%xmm3,%xmm0
|
||||
VFMADD231PS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231PS_( %xmm6,%xmm3,%xmm0 )
|
||||
vbroadcastss -2 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vbroadcastss -1 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231PS_ %xmm8,%xmm2,%xmm0
|
||||
VFMADD231PS_ %xmm10,%xmm3,%xmm0
|
||||
VFMADD231PS_( %xmm8,%xmm2,%xmm0 )
|
||||
VFMADD231PS_( %xmm10,%xmm3,%xmm0 )
|
||||
addq $4 , BI
|
||||
addq $4 , %rax
|
||||
.endm
|
||||
|
@ -323,16 +315,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovss -15 * SIZE(AO, %rax, SIZE), %xmm1
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vmovss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231SS_ %xmm5,%xmm2,%xmm1
|
||||
VFMADD231SS_ %xmm6,%xmm3,%xmm0
|
||||
VFMADD231SS_ %xmm7,%xmm3,%xmm1
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm5,%xmm2,%xmm1 )
|
||||
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
|
||||
VFMADD231SS_( %xmm7,%xmm3,%xmm1 )
|
||||
vmovss -2 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vmovss -1 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_ %xmm8,%xmm2,%xmm0
|
||||
VFMADD231SS_ %xmm9,%xmm2,%xmm1
|
||||
VFMADD231SS_ %xmm10,%xmm3,%xmm0
|
||||
VFMADD231SS_ %xmm11,%xmm3,%xmm1
|
||||
VFMADD231SS_( %xmm8,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm9,%xmm2,%xmm1 )
|
||||
VFMADD231SS_( %xmm10,%xmm3,%xmm0 )
|
||||
VFMADD231SS_( %xmm11,%xmm3,%xmm1 )
|
||||
addq $4 , BI
|
||||
addq $2, %rax
|
||||
.endm
|
||||
|
@ -388,12 +380,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovss -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vmovss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231SS_ %xmm6,%xmm3,%xmm0
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
|
||||
vmovss -2 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vmovss -1 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_ %xmm8,%xmm2,%xmm0
|
||||
VFMADD231SS_ %xmm10,%xmm3,%xmm0
|
||||
VFMADD231SS_( %xmm8,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm10,%xmm3,%xmm0 )
|
||||
addq $4 , BI
|
||||
addq $1, %rax
|
||||
.endm
|
||||
|
@ -436,10 +428,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -8 * SIZE(AO, %rax, SIZE), %ymm1
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_ %ymm4,%ymm2,%ymm0
|
||||
VFMADD231PS_ %ymm5,%ymm2,%ymm1
|
||||
VFMADD231PS_ %ymm6,%ymm3,%ymm0
|
||||
VFMADD231PS_ %ymm7,%ymm3,%ymm1
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm5,%ymm2,%ymm1 )
|
||||
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
|
||||
VFMADD231PS_( %ymm7,%ymm3,%ymm1 )
|
||||
addq $2 , BI
|
||||
addq $16, %rax
|
||||
.endm
|
||||
|
@ -480,8 +472,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %ymm3
|
||||
VFMADD231PS_ %ymm4,%ymm2,%ymm0
|
||||
VFMADD231PS_ %ymm6,%ymm3,%ymm0
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm6,%ymm3,%ymm0 )
|
||||
addq $2 , BI
|
||||
addq $8 , %rax
|
||||
.endm
|
||||
|
@ -513,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vbroadcastss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231PS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231PS_ %xmm6,%xmm3,%xmm0
|
||||
VFMADD231PS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231PS_( %xmm6,%xmm3,%xmm0 )
|
||||
addq $2 , BI
|
||||
addq $4 , %rax
|
||||
.endm
|
||||
|
@ -546,10 +538,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovss -15 * SIZE(AO, %rax, SIZE), %xmm1
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vmovss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231SS_ %xmm5,%xmm2,%xmm1
|
||||
VFMADD231SS_ %xmm6,%xmm3,%xmm0
|
||||
VFMADD231SS_ %xmm7,%xmm3,%xmm1
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm5,%xmm2,%xmm1 )
|
||||
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
|
||||
VFMADD231SS_( %xmm7,%xmm3,%xmm1 )
|
||||
addq $2 , BI
|
||||
addq $2, %rax
|
||||
.endm
|
||||
|
@ -589,8 +581,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovss -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
vmovss -3 * SIZE(BO, BI, SIZE), %xmm3
|
||||
VFMADD231SS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231SS_ %xmm6,%xmm3,%xmm0
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm6,%xmm3,%xmm0 )
|
||||
addq $2 , BI
|
||||
addq $1, %rax
|
||||
.endm
|
||||
|
@ -625,8 +617,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
|
||||
vmovups -8 * SIZE(AO, %rax, SIZE), %ymm1
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
VFMADD231PS_ %ymm4,%ymm2,%ymm0
|
||||
VFMADD231PS_ %ymm5,%ymm2,%ymm1
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
VFMADD231PS_( %ymm5,%ymm2,%ymm1 )
|
||||
addq $1 , BI
|
||||
addq $16, %rax
|
||||
.endm
|
||||
|
@ -656,7 +648,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL8x1_SUB
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %ymm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %ymm2
|
||||
VFMADD231PS_ %ymm4,%ymm2,%ymm0
|
||||
VFMADD231PS_( %ymm4,%ymm2,%ymm0 )
|
||||
addq $1 , BI
|
||||
addq $8 , %rax
|
||||
.endm
|
||||
|
@ -684,7 +676,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL4x1_SUB
|
||||
vmovups -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vbroadcastss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
VFMADD231PS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231PS_( %xmm4,%xmm2,%xmm0 )
|
||||
addq $1 , BI
|
||||
addq $4 , %rax
|
||||
.endm
|
||||
|
@ -712,8 +704,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovss -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovss -15 * SIZE(AO, %rax, SIZE), %xmm1
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
VFMADD231SS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231SS_ %xmm5,%xmm2,%xmm1
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
VFMADD231SS_( %xmm5,%xmm2,%xmm1 )
|
||||
addq $1 , BI
|
||||
addq $2, %rax
|
||||
.endm
|
||||
|
@ -743,7 +735,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL1x1_SUB
|
||||
vmovss -16 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovss -4 * SIZE(BO, BI, SIZE), %xmm2
|
||||
VFMADD231SS_ %xmm4,%xmm2,%xmm0
|
||||
VFMADD231SS_( %xmm4,%xmm2,%xmm0 )
|
||||
addq $1 , BI
|
||||
addq $1, %rax
|
||||
.endm
|
||||
|
|
|
@ -26,7 +26,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
**********************************************************************************/
|
||||
|
||||
/********************************************************************************
|
||||
* 2013/10/28 Saar
|
||||
* 2013/11/13 Saar
|
||||
* BLASTEST : OK
|
||||
* CTEST : OK
|
||||
* TEST : OK
|
||||
|
@ -137,43 +137,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfmaddpd y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfmaddpd y0,y1,y2,y0
|
||||
|
||||
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfnmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfnmaddpd y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfmaddpd y0,y1,y2,y0
|
||||
|
||||
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfmaddpd y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfnmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfnmaddpd y0,y1,y2,y0
|
||||
|
||||
#else
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfnmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfnmaddpd y0,y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfnmaddpd \y0,\y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfnmaddpd y0,y1,y2,y0
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -181,43 +165,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#if defined(NN) || defined(NT) || defined(TN) || defined(TT)
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfmadd231pd y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfmadd231pd y1,y2,y0
|
||||
|
||||
#elif defined(RN) || defined(RT) || defined(CN) || defined(CT)
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfnmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfnmadd231pd y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfmadd231pd y1,y2,y0
|
||||
|
||||
#elif defined(NR) || defined(NC) || defined(TR) || defined(TC)
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfmadd231pd y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfnmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfnmadd231pd y1,y2,y0
|
||||
|
||||
#else
|
||||
|
||||
.macro VFMADDPD_R y0,y1,y2
|
||||
vfnmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_R( y0,y1,y2 ) vfnmadd231pd y1,y2,y0
|
||||
|
||||
.macro VFMADDPD_I y0,y1,y2
|
||||
vfnmadd231pd \y1,\y2,\y0
|
||||
.endm
|
||||
#define VFMADDPD_I( y0,y1,y2 ) vfnmadd231pd y1,y2,y0
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -233,16 +201,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
vbroadcastsd -8 * SIZE(BO, BI, SIZE), %ymm4
|
||||
vbroadcastsd -7 * SIZE(BO, BI, SIZE), %ymm5
|
||||
VFMADDPD_R %ymm8 ,%ymm4,%ymm0
|
||||
VFMADDPD_R %ymm12,%ymm4,%ymm1
|
||||
VFMADDPD_R( %ymm8 ,%ymm4,%ymm0 )
|
||||
VFMADDPD_R( %ymm12,%ymm4,%ymm1 )
|
||||
vbroadcastsd -6 * SIZE(BO, BI, SIZE), %ymm6
|
||||
VFMADDPD_I %ymm9 ,%ymm5,%ymm0
|
||||
VFMADDPD_I %ymm13,%ymm5,%ymm1
|
||||
VFMADDPD_I( %ymm9 ,%ymm5,%ymm0 )
|
||||
VFMADDPD_I( %ymm13,%ymm5,%ymm1 )
|
||||
vbroadcastsd -5 * SIZE(BO, BI, SIZE), %ymm7
|
||||
VFMADDPD_R %ymm10,%ymm6,%ymm0
|
||||
VFMADDPD_R %ymm14,%ymm6,%ymm1
|
||||
VFMADDPD_I %ymm11,%ymm7,%ymm0
|
||||
VFMADDPD_I %ymm15,%ymm7,%ymm1
|
||||
VFMADDPD_R( %ymm10,%ymm6,%ymm0 )
|
||||
VFMADDPD_R( %ymm14,%ymm6,%ymm1 )
|
||||
VFMADDPD_I( %ymm11,%ymm7,%ymm0 )
|
||||
VFMADDPD_I( %ymm15,%ymm7,%ymm1 )
|
||||
|
||||
addq $4, BI
|
||||
addq $8, %rax
|
||||
|
@ -337,17 +305,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -8 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovddup -8 * SIZE(BO, BI, SIZE), %xmm4
|
||||
vmovups -6 * SIZE(AO, %rax, SIZE), %xmm1
|
||||
VFMADDPD_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPD_R %xmm12,%xmm4,%xmm1
|
||||
VFMADDPD_R( %xmm8,%xmm4,%xmm0 )
|
||||
VFMADDPD_R( %xmm12,%xmm4,%xmm1 )
|
||||
vmovddup -7 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPD_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPD_I %xmm13,%xmm5,%xmm1
|
||||
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
|
||||
VFMADDPD_I( %xmm13,%xmm5,%xmm1 )
|
||||
vmovddup -6 * SIZE(BO, BI, SIZE), %xmm6
|
||||
VFMADDPD_R %xmm10,%xmm6,%xmm0
|
||||
VFMADDPD_R %xmm14,%xmm6,%xmm1
|
||||
VFMADDPD_R( %xmm10,%xmm6,%xmm0 )
|
||||
VFMADDPD_R( %xmm14,%xmm6,%xmm1 )
|
||||
vmovddup -5 * SIZE(BO, BI, SIZE), %xmm7
|
||||
VFMADDPD_I %xmm11,%xmm7,%xmm0
|
||||
VFMADDPD_I %xmm15,%xmm7,%xmm1
|
||||
VFMADDPD_I( %xmm11,%xmm7,%xmm0 )
|
||||
VFMADDPD_I( %xmm15,%xmm7,%xmm1 )
|
||||
addq $4, BI
|
||||
addq $4, %rax
|
||||
.endm
|
||||
|
@ -441,12 +409,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -8 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovddup -8 * SIZE(BO, BI, SIZE), %xmm4
|
||||
vmovddup -7 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPD_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPD_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPD_R( %xmm8,%xmm4,%xmm0 )
|
||||
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
|
||||
vmovddup -6 * SIZE(BO, BI, SIZE), %xmm6
|
||||
vmovddup -5 * SIZE(BO, BI, SIZE), %xmm7
|
||||
VFMADDPD_R %xmm10,%xmm6,%xmm0
|
||||
VFMADDPD_I %xmm11,%xmm7,%xmm0
|
||||
VFMADDPD_R( %xmm10,%xmm6,%xmm0 )
|
||||
VFMADDPD_I( %xmm11,%xmm7,%xmm0 )
|
||||
addq $4, BI
|
||||
addq $2, %rax
|
||||
.endm
|
||||
|
@ -513,10 +481,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vmovups -4 * SIZE(AO, %rax, SIZE), %ymm1
|
||||
vbroadcastsd -4 * SIZE(BO, BI, SIZE) , %ymm4
|
||||
vbroadcastsd -3 * SIZE(BO, BI, SIZE) , %ymm5
|
||||
VFMADDPD_R %ymm8 ,%ymm4,%ymm0
|
||||
VFMADDPD_R %ymm12,%ymm4,%ymm1
|
||||
VFMADDPD_I %ymm9 ,%ymm5,%ymm0
|
||||
VFMADDPD_I %ymm13,%ymm5,%ymm1
|
||||
VFMADDPD_R( %ymm8 ,%ymm4,%ymm0 )
|
||||
VFMADDPD_R( %ymm12,%ymm4,%ymm1 )
|
||||
VFMADDPD_I( %ymm9 ,%ymm5,%ymm0 )
|
||||
VFMADDPD_I( %ymm13,%ymm5,%ymm1 )
|
||||
|
||||
addq $2, BI
|
||||
addq $8, %rax
|
||||
|
@ -585,12 +553,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL2x1_SUB
|
||||
vmovups -8 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovddup -4 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPD_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPD_R( %xmm8,%xmm4,%xmm0 )
|
||||
vmovups -6 * SIZE(AO, %rax, SIZE), %xmm1
|
||||
VFMADDPD_R %xmm12,%xmm4,%xmm1
|
||||
VFMADDPD_R( %xmm12,%xmm4,%xmm1 )
|
||||
vmovddup -3 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPD_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPD_I %xmm13,%xmm5,%xmm1
|
||||
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
|
||||
VFMADDPD_I( %xmm13,%xmm5,%xmm1 )
|
||||
addq $2, BI
|
||||
addq $4, %rax
|
||||
.endm
|
||||
|
@ -655,9 +623,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL1x1_SUB
|
||||
vmovups -8 * SIZE(AO, %rax, SIZE), %xmm0
|
||||
vmovddup -4 * SIZE(BO, BI, SIZE), %xmm4
|
||||
VFMADDPD_R %xmm8,%xmm4,%xmm0
|
||||
VFMADDPD_R( %xmm8,%xmm4,%xmm0 )
|
||||
vmovddup -3 * SIZE(BO, BI, SIZE), %xmm5
|
||||
VFMADDPD_I %xmm9,%xmm5,%xmm0
|
||||
VFMADDPD_I( %xmm9,%xmm5,%xmm0 )
|
||||
addq $2, BI
|
||||
addq $2, %rax
|
||||
.endm
|
||||
|
|
Loading…
Reference in New Issue