Make ARMV7 compile with xcode and add a CI job for it (#2537)
* Add an ARMV7 iOS build on Travis * thread_local appears to be unavailable on ARMV7 iOS * Add no-thumb option for ARMV7 IOS build to get it to accept DMB ISH * Make local labels in macros of nrm2_vfpv3.S compatible with the xcode assembler
This commit is contained in:
parent
f059e614eb
commit
806f89166e
|
@ -180,6 +180,12 @@ matrix:
|
|||
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode10.1
|
||||
env:
|
||||
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
|
|
|
@ -72,9 +72,9 @@
|
|||
defined __BORLANDC__ )
|
||||
# define thread_local __declspec(thread)
|
||||
/* note that ICC (linux) and Clang are covered by __GNUC__ */
|
||||
# elif defined __GNUC__ || \
|
||||
# elif (defined __GNUC__ || \
|
||||
defined __SUNPRO_C || \
|
||||
defined __xlC__
|
||||
defined __xlC__) && !defined(__APPLE__)
|
||||
# define thread_local __thread
|
||||
# else
|
||||
# define UNSAFE
|
||||
|
|
|
@ -61,20 +61,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
vldmia.f64 X!, { d4 }
|
||||
vcmpe.f64 d4, d6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
beq 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vabs.f64 d4, d4
|
||||
vcmpe.f64 d0, d4 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
|
||||
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_F1_NEXT_\@
|
||||
bge 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vdiv.f64 d2 , d0, d4 // scale / x
|
||||
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f64 d0 , d4 // scale = x
|
||||
|
||||
KERNEL_F1_NEXT_\@:
|
||||
1: /* KERNEL_F1_NEXT_\@: */
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -124,20 +124,20 @@ KERNEL_S1_NEXT:
|
|||
vldmia.f32 X!, { s4 }
|
||||
vcmpe.f32 s4, s6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
beq 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vabs.f32 s4, s4
|
||||
vcmpe.f32 s0, s4 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
|
||||
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_F1_NEXT_\@
|
||||
bge 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vdiv.f32 s2 , s0, s4 // scale / x
|
||||
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f32 s0 , s4 // scale = x
|
||||
|
||||
KERNEL_F1_NEXT_\@:
|
||||
1: /* KERNEL_F1_NEXT_\@: */
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -195,37 +195,37 @@ KERNEL_S1_NEXT:
|
|||
|
||||
vcmpe.f64 d4, d6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
beq 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vabs.f64 d4, d4
|
||||
vcmpe.f64 d0, d4 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
|
||||
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_F1_NEXT_\@
|
||||
bge 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vdiv.f64 d2 , d0, d4 // scale / x
|
||||
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f64 d0 , d4 // scale = x
|
||||
|
||||
KERNEL_F1_NEXT_\@:
|
||||
1: /* KERNEL_F1_NEXT_\@: */
|
||||
|
||||
vcmpe.f64 d5, d6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_F1_END_\@
|
||||
beq 2f /* KERNEL_F1_END_\@ */
|
||||
vabs.f64 d5, d5
|
||||
vcmpe.f64 d0, d5 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale
|
||||
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_F1_END_\@
|
||||
bge 2f /* KERNEL_F1_END_\@ */
|
||||
vdiv.f64 d2 , d0, d5 // scale / x
|
||||
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f64 d0 , d5 // scale = x
|
||||
|
||||
KERNEL_F1_END_\@:
|
||||
2: /* KERNEL_F1_END_\@: */
|
||||
|
||||
|
||||
.endm
|
||||
|
@ -253,37 +253,37 @@ KERNEL_F1_END_\@:
|
|||
|
||||
vcmpe.f64 d4, d6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_S1_NEXT_\@
|
||||
beq 1f /* KERNEL_S1_NEXT_\@ */
|
||||
vabs.f64 d4, d4
|
||||
vcmpe.f64 d0, d4 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f64 d2 , d4, d0 // scale >= x ? x / scale
|
||||
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_S1_NEXT_\@
|
||||
bge 1f /* KERNEL_S1_NEXT_\@ */
|
||||
vdiv.f64 d2 , d0, d4 // scale / x
|
||||
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f64 d0 , d4 // scale = x
|
||||
|
||||
KERNEL_S1_NEXT_\@:
|
||||
1: /* KERNEL_S1_NEXT_\@: */
|
||||
|
||||
vcmpe.f64 d5, d6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_S1_END_\@
|
||||
beq 2f /* KERNEL_S1_END_\@ */
|
||||
vabs.f64 d5, d5
|
||||
vcmpe.f64 d0, d5 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f64 d2 , d5, d0 // scale >= x ? x / scale
|
||||
vmlage.f64 d1 , d2 , d2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_S1_END_\@
|
||||
bge 2f /* KERNEL_S1_END_\@ */
|
||||
vdiv.f64 d2 , d0, d5 // scale / x
|
||||
vmul.f64 d2 , d2, d2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f64 d3 , d1, d2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f64 d1 , d3, d7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f64 d0 , d5 // scale = x
|
||||
|
||||
KERNEL_S1_END_\@:
|
||||
2: /* KERNEL_S1_END_\@: */
|
||||
|
||||
add X, X, INC_X
|
||||
|
||||
|
@ -298,37 +298,37 @@ KERNEL_S1_END_\@:
|
|||
|
||||
vcmpe.f32 s4, s6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
beq 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vabs.f32 s4, s4
|
||||
vcmpe.f32 s0, s4 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
|
||||
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_F1_NEXT_\@
|
||||
bge 1f /* KERNEL_F1_NEXT_\@ */
|
||||
vdiv.f32 s2 , s0, s4 // scale / x
|
||||
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f32 s0 , s4 // scale = x
|
||||
|
||||
KERNEL_F1_NEXT_\@:
|
||||
1: /* KERNEL_F1_NEXT_\@: */
|
||||
|
||||
vcmpe.f32 s5, s6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_F1_END_\@
|
||||
beq 2f /* KERNEL_F1_END_\@ */
|
||||
vabs.f32 s5, s5
|
||||
vcmpe.f32 s0, s5 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale
|
||||
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_F1_END_\@
|
||||
bge 2f /* KERNEL_F1_END_\@ */
|
||||
vdiv.f32 s2 , s0, s5 // scale / x
|
||||
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f32 s0 , s5 // scale = x
|
||||
|
||||
KERNEL_F1_END_\@:
|
||||
2: /* KERNEL_F1_END_\@: */
|
||||
|
||||
|
||||
.endm
|
||||
|
@ -354,37 +354,37 @@ KERNEL_F1_END_\@:
|
|||
|
||||
vcmpe.f32 s4, s6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_S1_NEXT_\@
|
||||
beq 1f /* KERNEL_S1_NEXT_\@ */
|
||||
vabs.f32 s4, s4
|
||||
vcmpe.f32 s0, s4 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f32 s2 , s4, s0 // scale >= x ? x / scale
|
||||
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_S1_NEXT_\@
|
||||
bge 1f /* KERNEL_S1_NEXT_\@ */
|
||||
vdiv.f32 s2 , s0, s4 // scale / x
|
||||
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f32 s0 , s4 // scale = x
|
||||
|
||||
KERNEL_S1_NEXT_\@:
|
||||
1: /* KERNEL_S1_NEXT_\@: */
|
||||
|
||||
vcmpe.f32 s5, s6 // compare with 0.0
|
||||
vmrs APSR_nzcv, fpscr
|
||||
beq KERNEL_S1_END_\@
|
||||
beq 2f /* KERNEL_S1_END_\@ */
|
||||
vabs.f32 s5, s5
|
||||
vcmpe.f32 s0, s5 // compare with scale
|
||||
vmrs APSR_nzcv, fpscr
|
||||
vdivge.f32 s2 , s5, s0 // scale >= x ? x / scale
|
||||
vmlage.f32 s1 , s2 , s2 // ssq += ( x/scale ) * ( x/scale )
|
||||
bge KERNEL_S1_END_\@
|
||||
bge 2f /* KERNEL_S1_END_\@ */
|
||||
vdiv.f32 s2 , s0, s5 // scale / x
|
||||
vmul.f32 s2 , s2, s2 // ( scale / x ) * ( scale / x )
|
||||
vmul.f32 s3 , s1, s2 // ssq * ( scale / x ) * ( scale / x )
|
||||
vadd.f32 s1 , s3, s7 // ssq = 1 + ssq * ( scale / x ) * ( scale / x )
|
||||
vmov.f32 s0 , s5 // scale = x
|
||||
|
||||
KERNEL_S1_END_\@:
|
||||
2: /* KERNEL_S1_END_\@: */
|
||||
|
||||
add X, X, INC_X
|
||||
|
||||
|
|
Loading…
Reference in New Issue