From 52de4cc8fdf976e0e09f81904e4f427b4dc64015 Mon Sep 17 00:00:00 2001 From: chenxuqiang Date: Wed, 1 Jan 2020 21:50:45 -0500 Subject: [PATCH] kernel/arm64/dgemm_beta.S: add beta == zero branch added beta == zero branch, and no need to load C matrix. Signed by: Xuqiang Chen --- kernel/arm64/dgemm_beta.S | 69 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) diff --git a/kernel/arm64/dgemm_beta.S b/kernel/arm64/dgemm_beta.S index 1ce452212..20011c343 100644 --- a/kernel/arm64/dgemm_beta.S +++ b/kernel/arm64/dgemm_beta.S @@ -80,6 +80,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. add sp, sp, #(11*16) .endm +.macro INIT_ZERO + fmul v0.2d, v0.2d, betaV0 + fmul v1.2d, v1.2d, betaV0 + fmul v2.2d, v2.2d, betaV0 + fmul v3.2d, v3.2d, betaV0 + fmul v4.2d, v4.2d, betaV0 + fmul v5.2d, v5.2d, betaV0 + fmul v6.2d, v6.2d, betaV0 + fmul v7.2d, v7.2d, betaV0 +.endm + /************************************************************************************** * End of macro definitions **************************************************************************************/ @@ -97,6 +108,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. cmp N, #0 ble .Lgemm_beta_L999 + fcmp BETA, #0.0 + beq .Lgemm_beta_zero_01 + .Lgemm_beta_01: lsl LDC, LDC, #3 @@ -180,4 +194,59 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. RESTORE_REGS ret +.Lgemm_beta_zero_01: + INIT_ZERO + lsl LDC, LDC, #3 + + .align 5 +.Lgemm_beta_zero_02: + mov A01, C00 + add C00, C00, LDC + + asr I, M, #4 + cmp I, #0 + ble .Lgemm_beta_zero_04 + + add A02, A01, #64 + + .align 5 +.Lgemm_beta_zero_03: + + st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [A01] + add A01, A01, calc_size + st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [A02] + add A02, A02, calc_size + + subs I, I, #1 + bne .Lgemm_beta_zero_03 + + .align 5 +.Lgemm_beta_zero_04: + + and I, M, #15 + cmp I, #0 + ble .Lgemm_beta_zero_06 + + .align 5 +.Lgemm_beta_zero_05: + + str beta0, [A01] + add A01, A01, #8 + + subs I, I, #1 + bne .Lgemm_beta_zero_05 + + .align 5 +.Lgemm_beta_zero_06: + + subs N, N, #1 + bne .Lgemm_beta_zero_02 + + .align 5 +.Lgemm_beta_zero_L999: + + mov x0, #0 + RESTORE_REGS + ret + EPILOGUE