Merge pull request #2277 from martin-frbg/issue2275

Rewrite ARMV8 code to allow cross-compilation for IOS
This commit is contained in:
Martin Kroeker 2019-10-06 23:01:54 +02:00 committed by GitHub
commit d2093a40d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 33 deletions

View File

@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){
#if defined(ASSEMBLER) && !defined(NEEDPARAM) #if defined(ASSEMBLER) && !defined(NEEDPARAM)
#define PROLOGUE \ .macro PROLOGUE
.text ;\ .text ;
.align 4 ;\ .p2align 2 ;
.global REALNAME ;\ .global REALNAME ;
.type REALNAME, %function ;\ .type REALNAME, %function ;
REALNAME: REALNAME:
.endm
#define EPILOGUE #define EPILOGUE

View File

@ -54,37 +54,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE) #if !defined(DOUBLE)
ldr s4, [X], #4 ldr s4, [X], #4
fcmp s4, REGZERO fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
beq 2f
fabs s4, s4 fabs s4, s4
fcmp SCALE, s4 fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_X_\@ bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
fdiv s2, SCALE, s4 fdiv s2, SCALE, s4
fmul s2, s2, s2 fmul s2, s2, s2
fmul s3, SSQ, s2 fmul s3, SSQ, s2
fadd SSQ, REGONE, s3 fadd SSQ, REGONE, s3
fmov SCALE, s4 fmov SCALE, s4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_X_\@: 1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv s2, s4, SCALE fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0] fmla SSQ, s2, v2.s[0]
#else #else
ldr d4, [X], #8 ldr d4, [X], #8
fcmp d4, REGZERO fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
fabs d4, d4 fabs d4, d4
fcmp SCALE, d4 fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_X_\@ bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
fdiv d2, SCALE, d4 fdiv d2, SCALE, d4
fmul d2, d2, d2 fmul d2, d2, d2
fmul d3, SSQ, d2 fmul d3, SSQ, d2
fadd SSQ, REGONE, d3 fadd SSQ, REGONE, d3
fmov SCALE, d4 fmov SCALE, d4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_X_\@: 1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv d2, d4, SCALE fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0] fmla SSQ, d2, v2.d[0]
#endif #endif
KERNEL_F1_NEXT_\@: 2: /* KERNEL_F1_NEXT_\@: */
.endm .endm
.macro KERNEL_S1 .macro KERNEL_S1

View File

@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE) #if !defined(DOUBLE)
ldr s4, [X], #4 ldr s4, [X], #4
fcmp s4, REGZERO fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
fabs s4, s4 fabs s4, s4
fcmp SCALE, s4 fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_XR_\@ bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
fdiv s2, SCALE, s4 fdiv s2, SCALE, s4
fmul s2, s2, s2 fmul s2, s2, s2
fmul s3, SSQ, s2 fmul s3, SSQ, s2
fadd SSQ, REGONE, s3 fadd SSQ, REGONE, s3
fmov SCALE, s4 fmov SCALE, s4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_XR_\@: 1: /* KERNEL_F1_SCALE_GE_XR_\@: */
fdiv s2, s4, SCALE fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0] fmla SSQ, s2, v2.s[0]
KERNEL_F1_NEXT_\@: 2: /* KERNEL_F1_NEXT_\@: */
ldr s5, [X], #4 ldr s5, [X], #4
fcmp s5, REGZERO fcmp s5, REGZERO
beq KERNEL_F1_END_\@ beq 4f /* KERNEL_F1_END_\@ */
fabs s5, s5 fabs s5, s5
fcmp SCALE, s5 fcmp SCALE, s5
bge KERNEL_F1_SCALE_GE_XI_\@ bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
fdiv s2, SCALE, s5 fdiv s2, SCALE, s5
fmul s2, s2, s2 fmul s2, s2, s2
fmul s3, SSQ, s2 fmul s3, SSQ, s2
fadd SSQ, REGONE, s3 fadd SSQ, REGONE, s3
fmov SCALE, s5 fmov SCALE, s5
b KERNEL_F1_END_\@ b 4f /* KERNEL_F1_END_\@ */
KERNEL_F1_SCALE_GE_XI_\@: 3: /* KERNEL_F1_SCALE_GE_XI_\@: */
fdiv s2, s5, SCALE fdiv s2, s5, SCALE
fmla SSQ, s2, v2.s[0] fmla SSQ, s2, v2.s[0]
#else #else
ldr d4, [X], #8 ldr d4, [X], #8
fcmp d4, REGZERO fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@ beq 2f /* KERNEL_F1_NEXT_\@ */
fabs d4, d4 fabs d4, d4
fcmp SCALE, d4 fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_XR_\@ bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */
fdiv d2, SCALE, d4 fdiv d2, SCALE, d4
fmul d2, d2, d2 fmul d2, d2, d2
fmul d3, SSQ, d2 fmul d3, SSQ, d2
fadd SSQ, REGONE, d3 fadd SSQ, REGONE, d3
fmov SCALE, d4 fmov SCALE, d4
b KERNEL_F1_NEXT_\@ b 2f /* KERNEL_F1_NEXT_\@ */
KERNEL_F1_SCALE_GE_XR_\@: 1: /* KERNEL_F1_SCALE_GE_XR_\@: */
fdiv d2, d4, SCALE fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0] fmla SSQ, d2, v2.d[0]
KERNEL_F1_NEXT_\@: 2: /* KERNEL_F1_NEXT_\@: */
ldr d5, [X], #8 ldr d5, [X], #8
fcmp d5, REGZERO fcmp d5, REGZERO
beq KERNEL_F1_END_\@ beq 4f /* KERNEL_F1_END_\@ */
fabs d5, d5 fabs d5, d5
fcmp SCALE, d5 fcmp SCALE, d5
bge KERNEL_F1_SCALE_GE_XI_\@ bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */
fdiv d2, SCALE, d5 fdiv d2, SCALE, d5
fmul d2, d2, d2 fmul d2, d2, d2
fmul d3, SSQ, d2 fmul d3, SSQ, d2
fadd SSQ, REGONE, d3 fadd SSQ, REGONE, d3
fmov SCALE, d5 fmov SCALE, d5
b KERNEL_F1_END_\@ b 4f /* KERNEL_F1_END_\@ */
KERNEL_F1_SCALE_GE_XI_\@: 3: /* KERNEL_F1_SCALE_GE_XI_\@: */
fdiv d2, d5, SCALE fdiv d2, d5, SCALE
fmla SSQ, d2, v2.d[0] fmla SSQ, d2, v2.d[0]
#endif #endif
KERNEL_F1_END_\@: 4: /* KERNEL_F1_END_\@: */
.endm .endm
.macro KERNEL_S1 .macro KERNEL_S1