From bb5413863fbf52dc5b8f2fd1b814b80c938d8c39 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 4 Oct 2019 14:50:03 +0200 Subject: [PATCH 1/2] Rewrite ARM64 PROLOGUE to make it compatible with xcode/ios --- common_arm64.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/common_arm64.h b/common_arm64.h index c6ef2fb5d..c5e6948dc 100644 --- a/common_arm64.h +++ b/common_arm64.h @@ -103,12 +103,14 @@ static inline int blas_quickdivide(blasint x, blasint y){ #if defined(ASSEMBLER) && !defined(NEEDPARAM) -#define PROLOGUE \ - .text ;\ - .align 4 ;\ - .global REALNAME ;\ - .type REALNAME, %function ;\ +.macro PROLOGUE + .text ; + .p2align 2 ; + .global REALNAME ; + .type REALNAME, %function ; REALNAME: +.endm + #define EPILOGUE From 56837e9d92c41290b07bc924915c633e39401abb Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Fri, 4 Oct 2019 14:53:23 +0200 Subject: [PATCH 2/2] Make local labels in macro compatible with the xcode assembler ... which does not perform the automatic numbering on instantiation that the _@ suffix signifies --- kernel/arm64/nrm2.S | 19 ++++++++++--------- kernel/arm64/znrm2.S | 38 +++++++++++++++++++------------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/kernel/arm64/nrm2.S b/kernel/arm64/nrm2.S index e2cbd4def..d4f0374cb 100644 --- a/kernel/arm64/nrm2.S +++ b/kernel/arm64/nrm2.S @@ -54,37 +54,38 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(DOUBLE) ldr s4, [X], #4 fcmp s4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ + beq 2f fabs s4, s4 fcmp SCALE, s4 - bge KERNEL_F1_SCALE_GE_X_\@ + bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ fdiv s2, SCALE, s4 fmul s2, s2, s2 fmul s3, SSQ, s2 fadd SSQ, REGONE, s3 fmov SCALE, s4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_X_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_X_\@: */ fdiv s2, s4, SCALE fmla SSQ, s2, v2.s[0] #else ldr d4, [X], #8 fcmp d4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ fabs d4, d4 fcmp SCALE, d4 - bge KERNEL_F1_SCALE_GE_X_\@ + bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ fdiv d2, SCALE, d4 fmul d2, d2, d2 fmul d3, SSQ, d2 fadd SSQ, REGONE, d3 fmov SCALE, d4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_X_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_X_\@: */ fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] #endif -KERNEL_F1_NEXT_\@: +2: /* KERNEL_F1_NEXT_\@: */ .endm .macro KERNEL_S1 diff --git a/kernel/arm64/znrm2.S b/kernel/arm64/znrm2.S index 1c89685ea..ce3f7d4ed 100644 --- a/kernel/arm64/znrm2.S +++ b/kernel/arm64/znrm2.S @@ -54,69 +54,69 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if !defined(DOUBLE) ldr s4, [X], #4 fcmp s4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ fabs s4, s4 fcmp SCALE, s4 - bge KERNEL_F1_SCALE_GE_XR_\@ + bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ fdiv s2, SCALE, s4 fmul s2, s2, s2 fmul s3, SSQ, s2 fadd SSQ, REGONE, s3 fmov SCALE, s4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_XR_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_XR_\@: */ fdiv s2, s4, SCALE fmla SSQ, s2, v2.s[0] -KERNEL_F1_NEXT_\@: +2: /* KERNEL_F1_NEXT_\@: */ ldr s5, [X], #4 fcmp s5, REGZERO - beq KERNEL_F1_END_\@ + beq 4f /* KERNEL_F1_END_\@ */ fabs s5, s5 fcmp SCALE, s5 - bge KERNEL_F1_SCALE_GE_XI_\@ + bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ fdiv s2, SCALE, s5 fmul s2, s2, s2 fmul s3, SSQ, s2 fadd SSQ, REGONE, s3 fmov SCALE, s5 - b KERNEL_F1_END_\@ -KERNEL_F1_SCALE_GE_XI_\@: + b 4f /* KERNEL_F1_END_\@ */ +3: /* KERNEL_F1_SCALE_GE_XI_\@: */ fdiv s2, s5, SCALE fmla SSQ, s2, v2.s[0] #else ldr d4, [X], #8 fcmp d4, REGZERO - beq KERNEL_F1_NEXT_\@ + beq 2f /* KERNEL_F1_NEXT_\@ */ fabs d4, d4 fcmp SCALE, d4 - bge KERNEL_F1_SCALE_GE_XR_\@ + bge 1f /* KERNEL_F1_SCALE_GE_XR_\@ */ fdiv d2, SCALE, d4 fmul d2, d2, d2 fmul d3, SSQ, d2 fadd SSQ, REGONE, d3 fmov SCALE, d4 - b KERNEL_F1_NEXT_\@ -KERNEL_F1_SCALE_GE_XR_\@: + b 2f /* KERNEL_F1_NEXT_\@ */ +1: /* KERNEL_F1_SCALE_GE_XR_\@: */ fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] -KERNEL_F1_NEXT_\@: +2: /* KERNEL_F1_NEXT_\@: */ ldr d5, [X], #8 fcmp d5, REGZERO - beq KERNEL_F1_END_\@ + beq 4f /* KERNEL_F1_END_\@ */ fabs d5, d5 fcmp SCALE, d5 - bge KERNEL_F1_SCALE_GE_XI_\@ + bge 3f /* KERNEL_F1_SCALE_GE_XI_\@ */ fdiv d2, SCALE, d5 fmul d2, d2, d2 fmul d3, SSQ, d2 fadd SSQ, REGONE, d3 fmov SCALE, d5 - b KERNEL_F1_END_\@ -KERNEL_F1_SCALE_GE_XI_\@: + b 4f /* KERNEL_F1_END_\@ */ +3: /* KERNEL_F1_SCALE_GE_XI_\@: */ fdiv d2, d5, SCALE fmla SSQ, d2, v2.d[0] #endif -KERNEL_F1_END_\@: +4: /* KERNEL_F1_END_\@: */ .endm .macro KERNEL_S1