Merge pull request #2357 from chenxuqiang/dgemm_beta_zero

kernel/arm64/dgemm_beta.S: add beta == zero branch
This commit is contained in:
Martin Kroeker 2020-01-02 22:28:36 +01:00 committed by GitHub
commit 456ee2e1f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 69 additions and 0 deletions

View File

@ -80,6 +80,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add sp, sp, #(11*16)
.endm
.macro INIT_ZERO
fmul v0.2d, v0.2d, betaV0
fmul v1.2d, v1.2d, betaV0
fmul v2.2d, v2.2d, betaV0
fmul v3.2d, v3.2d, betaV0
fmul v4.2d, v4.2d, betaV0
fmul v5.2d, v5.2d, betaV0
fmul v6.2d, v6.2d, betaV0
fmul v7.2d, v7.2d, betaV0
.endm
/**************************************************************************************
* End of macro definitions
**************************************************************************************/
@ -97,6 +108,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmp N, #0
ble .Lgemm_beta_L999
fcmp BETA, #0.0
beq .Lgemm_beta_zero_01
.Lgemm_beta_01:
lsl LDC, LDC, #3
@ -180,4 +194,59 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
RESTORE_REGS
ret
.Lgemm_beta_zero_01:
INIT_ZERO
lsl LDC, LDC, #3
.align 5
.Lgemm_beta_zero_02:
mov A01, C00
add C00, C00, LDC
asr I, M, #4
cmp I, #0
ble .Lgemm_beta_zero_04
add A02, A01, #64
.align 5
.Lgemm_beta_zero_03:
st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [A01]
add A01, A01, calc_size
st1 {v4.2d, v5.2d, v6.2d, v7.2d}, [A02]
add A02, A02, calc_size
subs I, I, #1
bne .Lgemm_beta_zero_03
.align 5
.Lgemm_beta_zero_04:
and I, M, #15
cmp I, #0
ble .Lgemm_beta_zero_06
.align 5
.Lgemm_beta_zero_05:
str beta0, [A01]
add A01, A01, #8
subs I, I, #1
bne .Lgemm_beta_zero_05
.align 5
.Lgemm_beta_zero_06:
subs N, N, #1
bne .Lgemm_beta_zero_02
.align 5
.Lgemm_beta_zero_L999:
mov x0, #0
RESTORE_REGS
ret
EPILOGUE