Add forgotten conditional uses of PREFETCH
This fixes a (cross-)compilation/linker error for PRESCOTT on Yocto. Signed-off-by: Zoltán Böszörményi <zoltan.boszormenyi@xenial.com>
This commit is contained in:
parent
de465ffdd5
commit
ca64861ce8
|
@ -189,12 +189,16 @@
|
|||
movss %xmm6, 6 * SIZE(B)
|
||||
movss %xmm7, 7 * SIZE(B)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO1)
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO2)
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO3)
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO4)
|
||||
#endif
|
||||
|
||||
#ifdef PREFETCHW
|
||||
PREFETCHW WPREFETCHSIZE * SIZE(B)
|
||||
#endif
|
||||
|
||||
movss %xmm8, 8 * SIZE(B)
|
||||
movss %xmm9, 9 * SIZE(B)
|
||||
|
@ -205,29 +209,39 @@
|
|||
movss %xmm14, 14 * SIZE(B)
|
||||
movss %xmm15, 15 * SIZE(B)
|
||||
#else
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO1)
|
||||
#endif
|
||||
movsd 0 * SIZE(AO1), %xmm0
|
||||
movhpd 0 * SIZE(AO2), %xmm0
|
||||
movsd 1 * SIZE(AO1), %xmm2
|
||||
movhpd 1 * SIZE(AO2), %xmm2
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO2)
|
||||
#endif
|
||||
movsd 2 * SIZE(AO1), %xmm4
|
||||
movhpd 2 * SIZE(AO2), %xmm4
|
||||
movsd 3 * SIZE(AO1), %xmm6
|
||||
movhpd 3 * SIZE(AO2), %xmm6
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO3)
|
||||
#endif
|
||||
movsd 0 * SIZE(AO3), %xmm1
|
||||
movhpd 0 * SIZE(AO4), %xmm1
|
||||
movsd 1 * SIZE(AO3), %xmm3
|
||||
movhpd 1 * SIZE(AO4), %xmm3
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO4)
|
||||
#endif
|
||||
movsd 2 * SIZE(AO3), %xmm5
|
||||
movhpd 2 * SIZE(AO4), %xmm5
|
||||
movsd 3 * SIZE(AO3), %xmm7
|
||||
movhpd 3 * SIZE(AO4), %xmm7
|
||||
|
||||
#ifdef PREFETCHW
|
||||
PREFETCHW WPREFETCHSIZE * SIZE(B)
|
||||
#endif
|
||||
movapd %xmm0, 0 * SIZE(B)
|
||||
movapd %xmm1, 2 * SIZE(B)
|
||||
movapd %xmm2, 4 * SIZE(B)
|
||||
|
@ -342,10 +356,14 @@
|
|||
movapd %xmm3, 6 * SIZE(B)
|
||||
#endif
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO1)
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO2)
|
||||
#endif
|
||||
|
||||
#ifdef PREFETCHW
|
||||
PREFETCHW WPREFETCHSIZE * SIZE(B)
|
||||
#endif
|
||||
|
||||
addq $4 * SIZE, AO1
|
||||
addq $4 * SIZE, AO2
|
||||
|
|
|
@ -219,31 +219,41 @@
|
|||
movaps %xmm3, 12 * SIZE(BO)
|
||||
#else
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO1)
|
||||
#endif
|
||||
movsd 0 * SIZE(AO1), %xmm0
|
||||
movhpd 1 * SIZE(AO1), %xmm0
|
||||
movsd 2 * SIZE(AO1), %xmm1
|
||||
movhpd 3 * SIZE(AO1), %xmm1
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO2)
|
||||
#endif
|
||||
movsd 0 * SIZE(AO2), %xmm2
|
||||
movhpd 1 * SIZE(AO2), %xmm2
|
||||
movsd 2 * SIZE(AO2), %xmm3
|
||||
movhpd 3 * SIZE(AO2), %xmm3
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO3)
|
||||
#endif
|
||||
movsd 0 * SIZE(AO3), %xmm4
|
||||
movhpd 1 * SIZE(AO3), %xmm4
|
||||
movsd 2 * SIZE(AO3), %xmm5
|
||||
movhpd 3 * SIZE(AO3), %xmm5
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH RPREFETCHSIZE * SIZE(AO4)
|
||||
#endif
|
||||
movsd 0 * SIZE(AO4), %xmm6
|
||||
movhpd 1 * SIZE(AO4), %xmm6
|
||||
movsd 2 * SIZE(AO4), %xmm7
|
||||
movhpd 3 * SIZE(AO4), %xmm7
|
||||
|
||||
#ifdef PREFETCHW
|
||||
PREFETCHW WPREFETCHSIZE * SIZE(BO)
|
||||
#endif
|
||||
movapd %xmm0, 0 * SIZE(BO)
|
||||
movapd %xmm1, 2 * SIZE(BO)
|
||||
movapd %xmm2, 4 * SIZE(BO)
|
||||
|
|
|
@ -102,6 +102,14 @@
|
|||
#define RPREFETCHSIZE (8 * 7 + 4)
|
||||
#define WPREFETCHSIZE (8 * 8 + 4)
|
||||
|
||||
#ifdef PREFETCH
|
||||
#define PREFETCH_KERNEL1(xx) PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO, %rax, 4) ;
|
||||
#define PREFETCH_KERNEL4(xx) PREFETCH (PREFETCHSIZE + 16) * SIZE + 1 * (xx) * SIZE(AO, %rax, 4) ;
|
||||
#else
|
||||
#define PREFETCH_KERNEL1(xx)
|
||||
#define PREFETCH_KERNEL4(xx)
|
||||
#endif
|
||||
|
||||
#ifndef GENERIC
|
||||
#define KERNEL1(xx) \
|
||||
mulps %xmm0, %xmm1 ;\
|
||||
|
@ -111,7 +119,7 @@
|
|||
addps %xmm3, %xmm9 ;\
|
||||
movaps -28 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm3 ;\
|
||||
mulps %xmm0, %xmm5 ;\
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO, %rax, 4) ;\
|
||||
PREFETCH_KERNEL1(xx) \
|
||||
mulps -20 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm0 ;\
|
||||
addps %xmm5, %xmm10 ;\
|
||||
movaps -24 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
|
||||
|
@ -157,7 +165,7 @@
|
|||
mulps -4 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm6 ;\
|
||||
addps %xmm5, %xmm14 ;\
|
||||
movaps 8 * SIZE + 2 * (xx) * SIZE(BO, %rax, 8), %xmm5 ;\
|
||||
PREFETCH (PREFETCHSIZE + 16) * SIZE + 1 * (xx) * SIZE(AO, %rax, 4) ;\
|
||||
PREFETCH_KERNEL4(xx) \
|
||||
addps %xmm6, %xmm15 ;\
|
||||
movaps -4 * SIZE + 1 * (xx) * SIZE(AO, %rax, 4), %xmm6
|
||||
|
||||
|
@ -1026,7 +1034,9 @@
|
|||
|
||||
.L22:
|
||||
mulps %xmm0, %xmm1
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
|
||||
#endif
|
||||
addps %xmm1, %xmm8
|
||||
movaps -28 * SIZE(BO), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
|
@ -1079,7 +1089,9 @@
|
|||
movaps 0 * SIZE(AO), %xmm0
|
||||
|
||||
mulps %xmm2, %xmm1
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO)
|
||||
#endif
|
||||
addps %xmm1, %xmm8
|
||||
movaps 36 * SIZE(BO), %xmm1
|
||||
mulps %xmm2, %xmm1
|
||||
|
@ -1285,7 +1297,9 @@
|
|||
|
||||
.L32:
|
||||
mulps %xmm0, %xmm1
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
|
||||
#endif
|
||||
addps %xmm1, %xmm8
|
||||
movaps -28 * SIZE(BO), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
|
@ -1679,7 +1693,9 @@
|
|||
|
||||
.L52:
|
||||
mulps %xmm0, %xmm1
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
|
||||
#endif
|
||||
mulps -28 * SIZE(BO), %xmm0
|
||||
addps %xmm1, %xmm8
|
||||
movaps -32 * SIZE(BO), %xmm1
|
||||
|
@ -1705,7 +1721,9 @@
|
|||
addps %xmm0, %xmm13
|
||||
movaps 32 * SIZE(AO), %xmm0
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO)
|
||||
#endif
|
||||
|
||||
mulps %xmm2, %xmm3
|
||||
mulps -12 * SIZE(BO), %xmm2
|
||||
|
@ -1733,7 +1751,9 @@
|
|||
addps %xmm2, %xmm13
|
||||
movaps 48 * SIZE(AO), %xmm2
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 32) * SIZE(AO)
|
||||
#endif
|
||||
|
||||
mulps %xmm4, %xmm5
|
||||
mulps 4 * SIZE(BO), %xmm4
|
||||
|
@ -1761,7 +1781,9 @@
|
|||
addps %xmm4, %xmm13
|
||||
movaps 64 * SIZE(AO), %xmm4
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 48) * SIZE(AO)
|
||||
#endif
|
||||
|
||||
mulps %xmm6, %xmm7
|
||||
mulps 20 * SIZE(BO), %xmm6
|
||||
|
@ -1942,7 +1964,9 @@
|
|||
|
||||
.L62:
|
||||
mulps %xmm0, %xmm1
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
|
||||
#endif
|
||||
mulps -28 * SIZE(BO), %xmm0
|
||||
addps %xmm1, %xmm8
|
||||
movaps -24 * SIZE(BO), %xmm1
|
||||
|
@ -1968,7 +1992,9 @@
|
|||
addps %xmm0, %xmm11
|
||||
movaps 0 * SIZE(AO), %xmm0
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 16) * SIZE(AO)
|
||||
#endif
|
||||
|
||||
mulps %xmm2, %xmm5
|
||||
mulps 4 * SIZE(BO), %xmm2
|
||||
|
@ -2130,7 +2156,9 @@
|
|||
|
||||
.L72:
|
||||
mulps %xmm0, %xmm1
|
||||
#ifdef PREFETCH
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE(AO)
|
||||
#endif
|
||||
addps %xmm1, %xmm8
|
||||
movaps -28 * SIZE(BO), %xmm1
|
||||
mulps %xmm0, %xmm1
|
||||
|
|
|
@ -484,7 +484,9 @@
|
|||
addpd a1, yy1
|
||||
MOVDDUP(1 * SIZE, A1, a1)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH PREFETCHSIZE(A1)
|
||||
#endif
|
||||
|
||||
movapd xtemp3, xt1
|
||||
mulpd a2, xt1
|
||||
|
@ -507,7 +509,9 @@
|
|||
addpd a2, yy2
|
||||
MOVDDUP(0 * SIZE, A2, a2)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH PREFETCHSIZE(XX)
|
||||
#endif
|
||||
|
||||
movapd xtemp3, xt1
|
||||
movapd 12 * SIZE(XX), xtemp3
|
||||
|
@ -546,7 +550,9 @@
|
|||
addpd a2, yy1
|
||||
MOVDDUP(6 * SIZE, A2, a2)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH PREFETCHSIZE(A2)
|
||||
#endif
|
||||
|
||||
movlpd yy1, 0 * SIZE(YY)
|
||||
movhpd yy1, 1 * SIZE(YY)
|
||||
|
@ -574,7 +580,9 @@
|
|||
addpd a1, yy1
|
||||
MOVDDUP(6 * SIZE, A1, a1)
|
||||
|
||||
#ifdef PREFETCHW
|
||||
PREFETCHW PREFETCHSIZE(YY)
|
||||
#endif
|
||||
|
||||
movapd xtemp4, xt1
|
||||
mulpd a2, xt1
|
||||
|
|
|
@ -442,7 +442,9 @@
|
|||
addpd a1, yy1
|
||||
MOVDDUP(3 * SIZE, A2, a1)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH PREFETCHSIZE(A1)
|
||||
#endif
|
||||
|
||||
movapd xtemp3, xt1
|
||||
mulpd a2, xt1
|
||||
|
@ -465,7 +467,9 @@
|
|||
addpd a1, yy2
|
||||
MOVDDUP(3 * SIZE, A1, a1)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH PREFETCHSIZE(XX)
|
||||
#endif
|
||||
|
||||
movapd xtemp3, xt1
|
||||
movapd 12 * SIZE(XX), xtemp3
|
||||
|
@ -504,7 +508,9 @@
|
|||
addpd a2, yy1
|
||||
MOVDDUP(5 * SIZE, A1, a2)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCH PREFETCHSIZE(A2)
|
||||
#endif
|
||||
|
||||
movlpd yy1, 0 * SIZE(YY)
|
||||
movhpd yy1, 1 * SIZE(YY)
|
||||
|
@ -532,7 +538,9 @@
|
|||
addpd a2, yy1
|
||||
MOVDDUP(4 * SIZE, A2, a2)
|
||||
|
||||
#ifdef PREFETCH
|
||||
PREFETCHW PREFETCHSIZE(YY)
|
||||
#endif
|
||||
|
||||
movapd xtemp4, xt1
|
||||
mulpd a3, xt1
|
||||
|
|
|
@ -109,12 +109,20 @@
|
|||
#define PREFETCHSIZE (8 * 6 + 4)
|
||||
#endif
|
||||
|
||||
#ifdef PREFETCH
|
||||
#define PREFETCH_KERNEL1(xx) PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO) ;
|
||||
#define PREFETCH_KERNEL5(xx) PREFETCH (PREFETCHSIZE + 8) * SIZE + 1 * (xx) * SIZE(AO) ;
|
||||
#else
|
||||
#define PREFETCH_KERNEL1(xx)
|
||||
#define PREFETCH_KERNEL5(xx)
|
||||
#endif
|
||||
|
||||
#define KERNEL1(xx) \
|
||||
mulps %xmm8, %xmm9 ;\
|
||||
addps %xmm9, %xmm0 ;\
|
||||
movaps 0 * SIZE + 2 * (xx) * SIZE(BO), %xmm9 ;\
|
||||
mulps %xmm8, %xmm11 ;\
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO) ;\
|
||||
PREFETCH_KERNEL1(xx) \
|
||||
addps %xmm11, %xmm1 ;\
|
||||
movaps 4 * SIZE + 2 * (xx) * SIZE(BO), %xmm11 ;\
|
||||
mulps %xmm8, %xmm13 ;\
|
||||
|
@ -171,7 +179,7 @@
|
|||
addps %xmm9, %xmm0 ;\
|
||||
movaps 32 * SIZE + 2 * (xx) * SIZE(BO), %xmm9 ;\
|
||||
mulps %xmm8, %xmm11 ;\
|
||||
PREFETCH (PREFETCHSIZE + 8) * SIZE + 1 * (xx) * SIZE(AO) ;\
|
||||
PREFETCH_KERNEL5(xx) \
|
||||
addps %xmm11, %xmm1 ;\
|
||||
movaps 36 * SIZE + 2 * (xx) * SIZE(BO), %xmm11 ;\
|
||||
mulps %xmm8, %xmm13 ;\
|
||||
|
|
|
@ -109,12 +109,20 @@
|
|||
#define PREFETCHSIZE (8 * 6 + 4)
|
||||
#endif
|
||||
|
||||
#ifdef PREFETCH
|
||||
#define PREFETCH_KERNEL1(xx) PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO) ;
|
||||
#define PREFETCH_KERNEL5(xx) PREFETCH (PREFETCHSIZE + 8) * SIZE + 1 * (xx) * SIZE(AO) ;
|
||||
#else
|
||||
#define PREFETCH_KERNEL1(xx)
|
||||
#define PREFETCH_KERNEL5(xx)
|
||||
#endif
|
||||
|
||||
#define KERNEL1(xx) \
|
||||
mulps %xmm8, %xmm9 ;\
|
||||
addps %xmm9, %xmm0 ;\
|
||||
movaps 0 * SIZE + 2 * (xx) * SIZE(BO), %xmm9 ;\
|
||||
mulps %xmm8, %xmm11 ;\
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO) ;\
|
||||
PREFETCH_KERNEL1(xx) \
|
||||
addps %xmm11, %xmm1 ;\
|
||||
movaps 4 * SIZE + 2 * (xx) * SIZE(BO), %xmm11 ;\
|
||||
mulps %xmm8, %xmm13 ;\
|
||||
|
@ -171,7 +179,7 @@
|
|||
addps %xmm9, %xmm0 ;\
|
||||
movaps 32 * SIZE + 2 * (xx) * SIZE(BO), %xmm9 ;\
|
||||
mulps %xmm8, %xmm11 ;\
|
||||
PREFETCH (PREFETCHSIZE + 8) * SIZE + 1 * (xx) * SIZE(AO) ;\
|
||||
PREFETCH_KERNEL5(xx) \
|
||||
addps %xmm11, %xmm1 ;\
|
||||
movaps 36 * SIZE + 2 * (xx) * SIZE(BO), %xmm11 ;\
|
||||
mulps %xmm8, %xmm13 ;\
|
||||
|
|
|
@ -109,12 +109,20 @@
|
|||
#define PREFETCHSIZE (8 * 6 + 4)
|
||||
#endif
|
||||
|
||||
#ifdef PREFETCH
|
||||
#define PREFETCH_KERNEL1(xx) PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO) ;
|
||||
#define PREFETCH_KERNEL5(xx) PREFETCH (PREFETCHSIZE + 8) * SIZE + 1 * (xx) * SIZE(AO) ;
|
||||
#else
|
||||
#define PREFETCH_KERNEL1(xx)
|
||||
#define PREFETCH_KERNEL5(xx)
|
||||
#endif
|
||||
|
||||
#define KERNEL1(xx) \
|
||||
mulps %xmm8, %xmm9 ;\
|
||||
addps %xmm9, %xmm0 ;\
|
||||
movaps 0 * SIZE + 2 * (xx) * SIZE(BO), %xmm9 ;\
|
||||
mulps %xmm8, %xmm11 ;\
|
||||
PREFETCH (PREFETCHSIZE + 0) * SIZE + 1 * (xx) * SIZE(AO) ;\
|
||||
PREFETCH_KERNEL1(xx) \
|
||||
addps %xmm11, %xmm1 ;\
|
||||
movaps 4 * SIZE + 2 * (xx) * SIZE(BO), %xmm11 ;\
|
||||
mulps %xmm8, %xmm13 ;\
|
||||
|
@ -171,7 +179,7 @@
|
|||
addps %xmm9, %xmm0 ;\
|
||||
movaps 32 * SIZE + 2 * (xx) * SIZE(BO), %xmm9 ;\
|
||||
mulps %xmm8, %xmm11 ;\
|
||||
PREFETCH (PREFETCHSIZE + 8) * SIZE + 1 * (xx) * SIZE(AO) ;\
|
||||
PREFETCH_KERNEL5(xx) \
|
||||
addps %xmm11, %xmm1 ;\
|
||||
movaps 36 * SIZE + 2 * (xx) * SIZE(BO), %xmm11 ;\
|
||||
mulps %xmm8, %xmm13 ;\
|
||||
|
|
Loading…
Reference in New Issue