Make local labels in macro compatible with the xcode assembler

This commit is contained in:
Martin Kroeker 2020-04-02 00:44:28 +02:00 committed by GitHub
parent 62cf7a82f1
commit 07cb1097ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 30 additions and 30 deletions

View File

@ -61,20 +61,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vldmia.f64 X!, { d4 } vldmia.f64 X!, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f64 d4, d4 vabs.f64 d4, d4
vcmpe.f64 d0, d4 // compare with scale vcmpe.f64 d0, d4 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@ bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f64 d2 , d0, d4 // scale / x vdiv.f64 d2 , d0, d4 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d4 // scale = x vmov.f64 d0 , d4 // scale = x
KERNEL_F1_NEXT_\@: 1: /* KERNEL_F1_NEXT_\@: */
.endm .endm
@ -124,20 +124,20 @@ KERNEL_S1_NEXT:
vldmia.f32 X!, { s4 } vldmia.f32 X!, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f32 s4, s4 vabs.f32 s4, s4
vcmpe.f32 s0, s4 // compare with scale vcmpe.f32 s0, s4 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@ bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f32 s2 , s0, s4 // scale / x vdiv.f32 s2 , s0, s4 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s4 // scale = x vmov.f32 s0 , s4 // scale = x
KERNEL_F1_NEXT_\@: 1: /* KERNEL_F1_NEXT_\@: */
.endm .endm
@ -195,37 +195,37 @@ KERNEL_S1_NEXT:
vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f64 d4, d4 vabs.f64 d4, d4
vcmpe.f64 d0, d4 // compare with scale vcmpe.f64 d0, d4 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@ bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f64 d2 , d0, d4 // scale / x vdiv.f64 d2 , d0, d4 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d4 // scale = x vmov.f64 d0 , d4 // scale = x
KERNEL_F1_NEXT_\@: 1: /* KERNEL_F1_NEXT_\@: */
vcmpe.f64 d5, d6 // compare with 0.0 vcmpe.f64 d5, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_END_\@ beq 2f /* KERNEL_F1_END_\@ */
vabs.f64 d5, d5 vabs.f64 d5, d5
vcmpe.f64 d0, d5 // compare with scale vcmpe.f64 d0, d5 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_END_\@ bge 2f /* KERNEL_F1_END_\@ */
vdiv.f64 d2 , d0, d5 // scale / x vdiv.f64 d2 , d0, d5 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d5 // scale = x vmov.f64 d0 , d5 // scale = x
KERNEL_F1_END_\@: 2: /* KERNEL_F1_END_\@: */
.endm .endm
@ -253,37 +253,37 @@ KERNEL_F1_END_\@:
vcmpe.f64 d4, d6 // compare with 0.0 vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT_\@ beq 1f /* KERNEL_S1_NEXT_\@ */
vabs.f64 d4, d4 vabs.f64 d4, d4
vcmpe.f64 d0, d4 // compare with scale vcmpe.f64 d0, d4 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_NEXT_\@ bge 1f /* KERNEL_S1_NEXT_\@ */
vdiv.f64 d2 , d0, d4 // scale / x vdiv.f64 d2 , d0, d4 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d4 // scale = x vmov.f64 d0 , d4 // scale = x
KERNEL_S1_NEXT_\@: 1: /* KERNEL_S1_NEXT_\@: */
vcmpe.f64 d5, d6 // compare with 0.0 vcmpe.f64 d5, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_END_\@ beq 2f /* KERNEL_S1_END_\@ */
vabs.f64 d5, d5 vabs.f64 d5, d5
vcmpe.f64 d0, d5 // compare with scale vcmpe.f64 d0, d5 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_END_\@ bge 2f /* KERNEL_S1_END_\@ */
vdiv.f64 d2 , d0, d5 // scale / x vdiv.f64 d2 , d0, d5 // scale / x
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x ) vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x ) vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f64 d0 , d5 // scale = x vmov.f64 d0 , d5 // scale = x
KERNEL_S1_END_\@: 2: /* KERNEL_S1_END_\@: */
add X, X, INC_X add X, X, INC_X
@ -298,37 +298,37 @@ KERNEL_S1_END_\@:
vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@ beq 1f /* KERNEL_F1_NEXT_\@ */
vabs.f32 s4, s4 vabs.f32 s4, s4
vcmpe.f32 s0, s4 // compare with scale vcmpe.f32 s0, s4 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_NEXT_\@ bge 1f /* KERNEL_F1_NEXT_\@ */
vdiv.f32 s2 , s0, s4 // scale / x vdiv.f32 s2 , s0, s4 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s4 // scale = x vmov.f32 s0 , s4 // scale = x
KERNEL_F1_NEXT_\@: 1: /* KERNEL_F1_NEXT_\@: */
vcmpe.f32 s5, s6 // compare with 0.0 vcmpe.f32 s5, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_F1_END_\@ beq 2f /* KERNEL_F1_END_\@ */
vabs.f32 s5, s5 vabs.f32 s5, s5
vcmpe.f32 s0, s5 // compare with scale vcmpe.f32 s0, s5 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_F1_END_\@ bge 2f /* KERNEL_F1_END_\@ */
vdiv.f32 s2 , s0, s5 // scale / x vdiv.f32 s2 , s0, s5 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s5 // scale = x vmov.f32 s0 , s5 // scale = x
KERNEL_F1_END_\@: 2: /* KERNEL_F1_END_\@: */
.endm .endm
@ -354,37 +354,37 @@ KERNEL_F1_END_\@:
vcmpe.f32 s4, s6 // compare with 0.0 vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT_\@ beq 1f /* KERNEL_S1_NEXT_\@ */
vabs.f32 s4, s4 vabs.f32 s4, s4
vcmpe.f32 s0, s4 // compare with scale vcmpe.f32 s0, s4 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_NEXT_\@ bge 1f /* KERNEL_S1_NEXT_\@ */
vdiv.f32 s2 , s0, s4 // scale / x vdiv.f32 s2 , s0, s4 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s4 // scale = x vmov.f32 s0 , s4 // scale = x
KERNEL_S1_NEXT_\@: 1: /* KERNEL_S1_NEXT_\@: */
vcmpe.f32 s5, s6 // compare with 0.0 vcmpe.f32 s5, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
beq KERNEL_S1_END_\@ beq 2f /* KERNEL_S1_END_\@ */
vabs.f32 s5, s5 vabs.f32 s5, s5
vcmpe.f32 s0, s5 // compare with scale vcmpe.f32 s0, s5 // compare with scale
vmrs APSR_nzcv, fpscr vmrs APSR_nzcv, fpscr
vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale ) vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
bge KERNEL_S1_END_\@ bge 2f /* KERNEL_S1_END_\@ */
vdiv.f32 s2 , s0, s5 // scale / x vdiv.f32 s2 , s0, s5 // scale / x
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x ) vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x ) vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x ) vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
vmov.f32 s0 , s5 // scale = x vmov.f32 s0 , s5 // scale = x
KERNEL_S1_END_\@: 2: /* KERNEL_S1_END_\@: */
add X, X, INC_X add X, X, INC_X