From 9605ba5b5ac169feb613fe88193fe65ad7eef869 Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Thu, 18 Nov 2021 10:43:02 +0100 Subject: [PATCH] Update dgemm_kernel_4x4_cortexA53.c --- kernel/arm64/dgemm_kernel_4x4_cortexA53.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kernel/arm64/dgemm_kernel_4x4_cortexA53.c b/kernel/arm64/dgemm_kernel_4x4_cortexA53.c index 4d2a58d32..31a898b43 100644 --- a/kernel/arm64/dgemm_kernel_4x4_cortexA53.c +++ b/kernel/arm64/dgemm_kernel_4x4_cortexA53.c @@ -81,7 +81,8 @@ static inline void dgemm_kernel_arm_cortex_a53_4x4_m4n12( /** register v0-v3 for loading A, v4-v7 for loading B, x0 for transporting data */ "ldp q0,q1,[%[sa]]; ldp q4,q5,[%[b1_]]\n\t" "ldr d6,[%[b2_]]; ldr x0,[%[b2_],#8]\n\t" - "blt 3f; beq 2f; 1:\n\t" + "blt 3f; beq 2f;\n\t" + "1:\n\t" /** main loop with unroll_k = 2, specially designed for cortex-A53 NEON pipeline */ "ldr d7,[%[b2_],#16]; fmov v6.d[1],x0\n\t" "fmla v8.2d,v0.2d,v4.d[0]; ldr x0,[%[b2_],#24]\n\t" @@ -147,7 +148,8 @@ static inline void dgemm_kernel_arm_cortex_a53_4x4_m4n12( "fmla v29.2d,v3.2d,v7.d[0]; ldr x0,[%[b2_],#8]\n\t" "fmla v30.2d,v2.2d,v7.d[1]; cmp %[K],#2\n\t" "fmla v31.2d,v3.2d,v7.d[1]\n\t" - "bgt 1b; blt 3f; 2:\n\t" + "bgt 1b; blt 3f;\n" + "2:\n\t" /** tail part with k = 2 */ "ldr d7,[%[b2_],#16]; fmov v6.d[1],x0\n\t" "fmla v8.2d,v0.2d,v4.d[0]; ldr x0,[%[b2_],#24]\n\t"