ARM64: Convert all labels to local labels
While debugging/profiling applications using perf or other tools, the kernels appear scattered in the profile reports. This is because the labels within the kernels are not local and each label is shown as a separate function. To avoid this, all the labels within the kernels are changed to local labels.
This commit is contained in:
@@ -549,11 +549,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
mov counterJ, origN
|
||||
asr counterJ, counterJ, #2 // J = J / 4
|
||||
cmp counterJ, #0
|
||||
ble dtrmm_kernel_L2_BEGIN
|
||||
ble .Ldtrmm_kernel_L2_BEGIN
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
dtrmm_kernel_L4_BEGIN:
|
||||
.Ldtrmm_kernel_L4_BEGIN:
|
||||
mov pCRow0, pC // pCRow0 = C
|
||||
add pC, pC, LDC, lsl #2
|
||||
|
||||
@@ -563,14 +563,14 @@ dtrmm_kernel_L4_BEGIN:
|
||||
|
||||
mov pA, origPA // pA = start of A array
|
||||
|
||||
dtrmm_kernel_L4_M4_BEGIN:
|
||||
.Ldtrmm_kernel_L4_M4_BEGIN:
|
||||
|
||||
mov counterI, origM
|
||||
asr counterI, counterI, #2 // counterI = counterI / 4
|
||||
cmp counterI, #0
|
||||
ble dtrmm_kernel_L4_M2_BEGIN
|
||||
ble .Ldtrmm_kernel_L4_M2_BEGIN
|
||||
|
||||
dtrmm_kernel_L4_M4_20:
|
||||
.Ldtrmm_kernel_L4_M4_20:
|
||||
|
||||
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
|
||||
mov pB, origPB
|
||||
@@ -591,57 +591,57 @@ dtrmm_kernel_L4_M4_20:
|
||||
|
||||
asr counterL , tempK, #1 // L = K / 2
|
||||
cmp counterL , #2 // is there at least 4 to do?
|
||||
blt dtrmm_kernel_L4_M4_32
|
||||
blt .Ldtrmm_kernel_L4_M4_32
|
||||
|
||||
KERNEL4x4_I // do one in the K
|
||||
KERNEL4x4_M2 // do another in the K
|
||||
|
||||
subs counterL, counterL, #2
|
||||
ble dtrmm_kernel_L4_M4_22a
|
||||
ble .Ldtrmm_kernel_L4_M4_22a
|
||||
.align 5
|
||||
|
||||
dtrmm_kernel_L4_M4_22:
|
||||
.Ldtrmm_kernel_L4_M4_22:
|
||||
|
||||
KERNEL4x4_M1
|
||||
KERNEL4x4_M2
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L4_M4_22
|
||||
bgt .Ldtrmm_kernel_L4_M4_22
|
||||
|
||||
|
||||
dtrmm_kernel_L4_M4_22a:
|
||||
.Ldtrmm_kernel_L4_M4_22a:
|
||||
|
||||
KERNEL4x4_M1
|
||||
KERNEL4x4_E
|
||||
|
||||
b dtrmm_kernel_L4_M4_44
|
||||
b .Ldtrmm_kernel_L4_M4_44
|
||||
|
||||
dtrmm_kernel_L4_M4_32:
|
||||
.Ldtrmm_kernel_L4_M4_32:
|
||||
|
||||
tst counterL, #1
|
||||
ble dtrmm_kernel_L4_M4_40
|
||||
ble .Ldtrmm_kernel_L4_M4_40
|
||||
|
||||
KERNEL4x4_I
|
||||
|
||||
KERNEL4x4_E
|
||||
|
||||
b dtrmm_kernel_L4_M4_44
|
||||
b .Ldtrmm_kernel_L4_M4_44
|
||||
|
||||
|
||||
dtrmm_kernel_L4_M4_40:
|
||||
.Ldtrmm_kernel_L4_M4_40:
|
||||
|
||||
INIT4x4
|
||||
|
||||
dtrmm_kernel_L4_M4_44:
|
||||
.Ldtrmm_kernel_L4_M4_44:
|
||||
|
||||
ands counterL , tempK, #1
|
||||
ble dtrmm_kernel_L4_M4_100
|
||||
ble .Ldtrmm_kernel_L4_M4_100
|
||||
|
||||
dtrmm_kernel_L4_M4_46:
|
||||
.Ldtrmm_kernel_L4_M4_46:
|
||||
|
||||
KERNEL4x4_SUB
|
||||
|
||||
dtrmm_kernel_L4_M4_100:
|
||||
.Ldtrmm_kernel_L4_M4_100:
|
||||
|
||||
SAVE4x4
|
||||
|
||||
@@ -660,20 +660,20 @@ dtrmm_kernel_L4_M4_100:
|
||||
add tempOffset, tempOffset, #4
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L4_M4_END:
|
||||
.Ldtrmm_kernel_L4_M4_END:
|
||||
subs counterI, counterI, #1
|
||||
bne dtrmm_kernel_L4_M4_20
|
||||
bne .Ldtrmm_kernel_L4_M4_20
|
||||
|
||||
dtrmm_kernel_L4_M2_BEGIN:
|
||||
.Ldtrmm_kernel_L4_M2_BEGIN:
|
||||
|
||||
mov counterI, origM
|
||||
tst counterI , #3
|
||||
ble dtrmm_kernel_L4_END
|
||||
ble .Ldtrmm_kernel_L4_END
|
||||
|
||||
tst counterI, #2 // counterI = counterI / 2
|
||||
ble dtrmm_kernel_L4_M1_BEGIN
|
||||
ble .Ldtrmm_kernel_L4_M1_BEGIN
|
||||
|
||||
dtrmm_kernel_L4_M2_20:
|
||||
.Ldtrmm_kernel_L4_M2_20:
|
||||
|
||||
INIT2x4
|
||||
|
||||
@@ -697,9 +697,9 @@ dtrmm_kernel_L4_M2_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL , #0
|
||||
ble dtrmm_kernel_L4_M2_40
|
||||
ble .Ldtrmm_kernel_L4_M2_40
|
||||
|
||||
dtrmm_kernel_L4_M2_22:
|
||||
.Ldtrmm_kernel_L4_M2_22:
|
||||
|
||||
KERNEL2x4_SUB
|
||||
KERNEL2x4_SUB
|
||||
@@ -712,22 +712,22 @@ dtrmm_kernel_L4_M2_22:
|
||||
KERNEL2x4_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L4_M2_22
|
||||
bgt .Ldtrmm_kernel_L4_M2_22
|
||||
|
||||
|
||||
dtrmm_kernel_L4_M2_40:
|
||||
.Ldtrmm_kernel_L4_M2_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L4_M2_100
|
||||
ble .Ldtrmm_kernel_L4_M2_100
|
||||
|
||||
dtrmm_kernel_L4_M2_42:
|
||||
.Ldtrmm_kernel_L4_M2_42:
|
||||
|
||||
KERNEL2x4_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L4_M2_42
|
||||
bgt .Ldtrmm_kernel_L4_M2_42
|
||||
|
||||
dtrmm_kernel_L4_M2_100:
|
||||
.Ldtrmm_kernel_L4_M2_100:
|
||||
|
||||
SAVE2x4
|
||||
|
||||
@@ -747,15 +747,15 @@ dtrmm_kernel_L4_M2_100:
|
||||
add tempOffset, tempOffset, #2
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L4_M2_END:
|
||||
.Ldtrmm_kernel_L4_M2_END:
|
||||
|
||||
|
||||
dtrmm_kernel_L4_M1_BEGIN:
|
||||
.Ldtrmm_kernel_L4_M1_BEGIN:
|
||||
|
||||
tst counterI, #1 // counterI = counterI % 2
|
||||
ble dtrmm_kernel_L4_END
|
||||
ble .Ldtrmm_kernel_L4_END
|
||||
|
||||
dtrmm_kernel_L4_M1_20:
|
||||
.Ldtrmm_kernel_L4_M1_20:
|
||||
|
||||
INIT1x4
|
||||
|
||||
@@ -779,9 +779,9 @@ dtrmm_kernel_L4_M1_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL , #0
|
||||
ble dtrmm_kernel_L4_M1_40
|
||||
ble .Ldtrmm_kernel_L4_M1_40
|
||||
|
||||
dtrmm_kernel_L4_M1_22:
|
||||
.Ldtrmm_kernel_L4_M1_22:
|
||||
KERNEL1x4_SUB
|
||||
KERNEL1x4_SUB
|
||||
KERNEL1x4_SUB
|
||||
@@ -793,22 +793,22 @@ dtrmm_kernel_L4_M1_22:
|
||||
KERNEL1x4_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L4_M1_22
|
||||
bgt .Ldtrmm_kernel_L4_M1_22
|
||||
|
||||
|
||||
dtrmm_kernel_L4_M1_40:
|
||||
.Ldtrmm_kernel_L4_M1_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L4_M1_100
|
||||
ble .Ldtrmm_kernel_L4_M1_100
|
||||
|
||||
dtrmm_kernel_L4_M1_42:
|
||||
.Ldtrmm_kernel_L4_M1_42:
|
||||
|
||||
KERNEL1x4_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L4_M1_42
|
||||
bgt .Ldtrmm_kernel_L4_M1_42
|
||||
|
||||
dtrmm_kernel_L4_M1_100:
|
||||
.Ldtrmm_kernel_L4_M1_100:
|
||||
|
||||
SAVE1x4
|
||||
|
||||
@@ -828,7 +828,7 @@ dtrmm_kernel_L4_M1_100:
|
||||
add tempOffset, tempOffset, #1
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L4_END:
|
||||
.Ldtrmm_kernel_L4_END:
|
||||
|
||||
lsl temp, origK, #5
|
||||
add origPB, origPB, temp // B = B + K * 4 * 8
|
||||
@@ -838,19 +838,19 @@ dtrmm_kernel_L4_END:
|
||||
#endif
|
||||
|
||||
subs counterJ, counterJ , #1 // j--
|
||||
bgt dtrmm_kernel_L4_BEGIN
|
||||
bgt .Ldtrmm_kernel_L4_BEGIN
|
||||
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction
|
||||
.Ldtrmm_kernel_L2_BEGIN: // less than 2 left in N direction
|
||||
|
||||
mov counterJ , origN
|
||||
tst counterJ , #3
|
||||
ble dtrmm_kernel_L999 // error, N was less than 4?
|
||||
ble .Ldtrmm_kernel_L999 // error, N was less than 4?
|
||||
|
||||
tst counterJ , #2
|
||||
ble dtrmm_kernel_L1_BEGIN
|
||||
ble .Ldtrmm_kernel_L1_BEGIN
|
||||
|
||||
mov pCRow0, pC // pCRow0 = pC
|
||||
|
||||
@@ -863,14 +863,14 @@ dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction
|
||||
mov pA, origPA // pA = A
|
||||
|
||||
|
||||
dtrmm_kernel_L2_M4_BEGIN:
|
||||
.Ldtrmm_kernel_L2_M4_BEGIN:
|
||||
|
||||
mov counterI, origM
|
||||
asr counterI, counterI, #2 // counterI = counterI / 4
|
||||
cmp counterI,#0
|
||||
ble dtrmm_kernel_L2_M2_BEGIN
|
||||
ble .Ldtrmm_kernel_L2_M2_BEGIN
|
||||
|
||||
dtrmm_kernel_L2_M4_20:
|
||||
.Ldtrmm_kernel_L2_M4_20:
|
||||
|
||||
INIT4x2
|
||||
|
||||
@@ -894,10 +894,10 @@ dtrmm_kernel_L2_M4_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL,#0
|
||||
ble dtrmm_kernel_L2_M4_40
|
||||
ble .Ldtrmm_kernel_L2_M4_40
|
||||
.align 5
|
||||
|
||||
dtrmm_kernel_L2_M4_22:
|
||||
.Ldtrmm_kernel_L2_M4_22:
|
||||
KERNEL4x2_SUB
|
||||
KERNEL4x2_SUB
|
||||
KERNEL4x2_SUB
|
||||
@@ -909,22 +909,22 @@ dtrmm_kernel_L2_M4_22:
|
||||
KERNEL4x2_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L2_M4_22
|
||||
bgt .Ldtrmm_kernel_L2_M4_22
|
||||
|
||||
|
||||
dtrmm_kernel_L2_M4_40:
|
||||
.Ldtrmm_kernel_L2_M4_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L2_M4_100
|
||||
ble .Ldtrmm_kernel_L2_M4_100
|
||||
|
||||
dtrmm_kernel_L2_M4_42:
|
||||
.Ldtrmm_kernel_L2_M4_42:
|
||||
|
||||
KERNEL4x2_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L2_M4_42
|
||||
bgt .Ldtrmm_kernel_L2_M4_42
|
||||
|
||||
dtrmm_kernel_L2_M4_100:
|
||||
.Ldtrmm_kernel_L2_M4_100:
|
||||
|
||||
SAVE4x2
|
||||
|
||||
@@ -944,22 +944,22 @@ dtrmm_kernel_L2_M4_100:
|
||||
add tempOffset, tempOffset, #4
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L2_M4_END:
|
||||
.Ldtrmm_kernel_L2_M4_END:
|
||||
|
||||
subs counterI, counterI, #1
|
||||
bgt dtrmm_kernel_L2_M4_20
|
||||
bgt .Ldtrmm_kernel_L2_M4_20
|
||||
|
||||
|
||||
dtrmm_kernel_L2_M2_BEGIN:
|
||||
.Ldtrmm_kernel_L2_M2_BEGIN:
|
||||
|
||||
mov counterI, origM
|
||||
tst counterI , #3
|
||||
ble dtrmm_kernel_L2_END
|
||||
ble .Ldtrmm_kernel_L2_END
|
||||
|
||||
tst counterI, #2 // counterI = counterI / 2
|
||||
ble dtrmm_kernel_L2_M1_BEGIN
|
||||
ble .Ldtrmm_kernel_L2_M1_BEGIN
|
||||
|
||||
dtrmm_kernel_L2_M2_20:
|
||||
.Ldtrmm_kernel_L2_M2_20:
|
||||
|
||||
INIT2x2
|
||||
|
||||
@@ -983,9 +983,9 @@ dtrmm_kernel_L2_M2_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL,#0
|
||||
ble dtrmm_kernel_L2_M2_40
|
||||
ble .Ldtrmm_kernel_L2_M2_40
|
||||
|
||||
dtrmm_kernel_L2_M2_22:
|
||||
.Ldtrmm_kernel_L2_M2_22:
|
||||
|
||||
KERNEL2x2_SUB
|
||||
KERNEL2x2_SUB
|
||||
@@ -998,22 +998,22 @@ dtrmm_kernel_L2_M2_22:
|
||||
KERNEL2x2_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L2_M2_22
|
||||
bgt .Ldtrmm_kernel_L2_M2_22
|
||||
|
||||
|
||||
dtrmm_kernel_L2_M2_40:
|
||||
.Ldtrmm_kernel_L2_M2_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L2_M2_100
|
||||
ble .Ldtrmm_kernel_L2_M2_100
|
||||
|
||||
dtrmm_kernel_L2_M2_42:
|
||||
.Ldtrmm_kernel_L2_M2_42:
|
||||
|
||||
KERNEL2x2_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L2_M2_42
|
||||
bgt .Ldtrmm_kernel_L2_M2_42
|
||||
|
||||
dtrmm_kernel_L2_M2_100:
|
||||
.Ldtrmm_kernel_L2_M2_100:
|
||||
|
||||
SAVE2x2
|
||||
|
||||
@@ -1033,15 +1033,15 @@ dtrmm_kernel_L2_M2_100:
|
||||
add tempOffset, tempOffset, #2
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L2_M2_END:
|
||||
.Ldtrmm_kernel_L2_M2_END:
|
||||
|
||||
|
||||
dtrmm_kernel_L2_M1_BEGIN:
|
||||
.Ldtrmm_kernel_L2_M1_BEGIN:
|
||||
|
||||
tst counterI, #1 // counterI = counterI % 2
|
||||
ble dtrmm_kernel_L2_END
|
||||
ble .Ldtrmm_kernel_L2_END
|
||||
|
||||
dtrmm_kernel_L2_M1_20:
|
||||
.Ldtrmm_kernel_L2_M1_20:
|
||||
|
||||
INIT1x2
|
||||
|
||||
@@ -1065,9 +1065,9 @@ dtrmm_kernel_L2_M1_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL, #0
|
||||
ble dtrmm_kernel_L2_M1_40
|
||||
ble .Ldtrmm_kernel_L2_M1_40
|
||||
|
||||
dtrmm_kernel_L2_M1_22:
|
||||
.Ldtrmm_kernel_L2_M1_22:
|
||||
KERNEL1x2_SUB
|
||||
KERNEL1x2_SUB
|
||||
KERNEL1x2_SUB
|
||||
@@ -1079,22 +1079,22 @@ dtrmm_kernel_L2_M1_22:
|
||||
KERNEL1x2_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L2_M1_22
|
||||
bgt .Ldtrmm_kernel_L2_M1_22
|
||||
|
||||
|
||||
dtrmm_kernel_L2_M1_40:
|
||||
.Ldtrmm_kernel_L2_M1_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L2_M1_100
|
||||
ble .Ldtrmm_kernel_L2_M1_100
|
||||
|
||||
dtrmm_kernel_L2_M1_42:
|
||||
.Ldtrmm_kernel_L2_M1_42:
|
||||
|
||||
KERNEL1x2_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L2_M1_42
|
||||
bgt .Ldtrmm_kernel_L2_M1_42
|
||||
|
||||
dtrmm_kernel_L2_M1_100:
|
||||
.Ldtrmm_kernel_L2_M1_100:
|
||||
|
||||
SAVE1x2
|
||||
|
||||
@@ -1114,7 +1114,7 @@ dtrmm_kernel_L2_M1_100:
|
||||
add tempOffset, tempOffset, #1
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L2_END:
|
||||
.Ldtrmm_kernel_L2_END:
|
||||
#if !defined(LEFT)
|
||||
add tempOffset, tempOffset, #2
|
||||
#endif
|
||||
@@ -1122,11 +1122,11 @@ dtrmm_kernel_L2_END:
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
dtrmm_kernel_L1_BEGIN:
|
||||
.Ldtrmm_kernel_L1_BEGIN:
|
||||
|
||||
mov counterJ , origN
|
||||
tst counterJ , #1
|
||||
ble dtrmm_kernel_L999 // done
|
||||
ble .Ldtrmm_kernel_L999 // done
|
||||
|
||||
|
||||
mov pCRow0, pC // pCRow0 = C
|
||||
@@ -1138,14 +1138,14 @@ dtrmm_kernel_L1_BEGIN:
|
||||
|
||||
mov pA, origPA // pA = A
|
||||
|
||||
dtrmm_kernel_L1_M4_BEGIN:
|
||||
.Ldtrmm_kernel_L1_M4_BEGIN:
|
||||
|
||||
mov counterI, origM
|
||||
asr counterI, counterI, #2 // counterI = counterI / 4
|
||||
cmp counterI, #0
|
||||
ble dtrmm_kernel_L1_M2_BEGIN
|
||||
ble .Ldtrmm_kernel_L1_M2_BEGIN
|
||||
|
||||
dtrmm_kernel_L1_M4_20:
|
||||
.Ldtrmm_kernel_L1_M4_20:
|
||||
|
||||
INIT4x1
|
||||
|
||||
@@ -1169,10 +1169,10 @@ dtrmm_kernel_L1_M4_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL , #0
|
||||
ble dtrmm_kernel_L1_M4_40
|
||||
ble .Ldtrmm_kernel_L1_M4_40
|
||||
.align 5
|
||||
|
||||
dtrmm_kernel_L1_M4_22:
|
||||
.Ldtrmm_kernel_L1_M4_22:
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
KERNEL4x1_SUB
|
||||
@@ -1184,22 +1184,22 @@ dtrmm_kernel_L1_M4_22:
|
||||
KERNEL4x1_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L1_M4_22
|
||||
bgt .Ldtrmm_kernel_L1_M4_22
|
||||
|
||||
|
||||
dtrmm_kernel_L1_M4_40:
|
||||
.Ldtrmm_kernel_L1_M4_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L1_M4_100
|
||||
ble .Ldtrmm_kernel_L1_M4_100
|
||||
|
||||
dtrmm_kernel_L1_M4_42:
|
||||
.Ldtrmm_kernel_L1_M4_42:
|
||||
|
||||
KERNEL4x1_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L1_M4_42
|
||||
bgt .Ldtrmm_kernel_L1_M4_42
|
||||
|
||||
dtrmm_kernel_L1_M4_100:
|
||||
.Ldtrmm_kernel_L1_M4_100:
|
||||
|
||||
SAVE4x1
|
||||
|
||||
@@ -1220,22 +1220,22 @@ dtrmm_kernel_L1_M4_100:
|
||||
add tempOffset, tempOffset, #4
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L1_M4_END:
|
||||
.Ldtrmm_kernel_L1_M4_END:
|
||||
|
||||
subs counterI, counterI, #1
|
||||
bgt dtrmm_kernel_L1_M4_20
|
||||
bgt .Ldtrmm_kernel_L1_M4_20
|
||||
|
||||
|
||||
dtrmm_kernel_L1_M2_BEGIN:
|
||||
.Ldtrmm_kernel_L1_M2_BEGIN:
|
||||
|
||||
mov counterI, origM
|
||||
tst counterI , #3
|
||||
ble dtrmm_kernel_L1_END
|
||||
ble .Ldtrmm_kernel_L1_END
|
||||
|
||||
tst counterI, #2 // counterI = counterI / 2
|
||||
ble dtrmm_kernel_L1_M1_BEGIN
|
||||
ble .Ldtrmm_kernel_L1_M1_BEGIN
|
||||
|
||||
dtrmm_kernel_L1_M2_20:
|
||||
.Ldtrmm_kernel_L1_M2_20:
|
||||
|
||||
INIT2x1
|
||||
|
||||
@@ -1259,9 +1259,9 @@ dtrmm_kernel_L1_M2_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL , #0
|
||||
ble dtrmm_kernel_L1_M2_40
|
||||
ble .Ldtrmm_kernel_L1_M2_40
|
||||
|
||||
dtrmm_kernel_L1_M2_22:
|
||||
.Ldtrmm_kernel_L1_M2_22:
|
||||
|
||||
KERNEL2x1_SUB
|
||||
KERNEL2x1_SUB
|
||||
@@ -1274,22 +1274,22 @@ dtrmm_kernel_L1_M2_22:
|
||||
KERNEL2x1_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L1_M2_22
|
||||
bgt .Ldtrmm_kernel_L1_M2_22
|
||||
|
||||
|
||||
dtrmm_kernel_L1_M2_40:
|
||||
.Ldtrmm_kernel_L1_M2_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L1_M2_100
|
||||
ble .Ldtrmm_kernel_L1_M2_100
|
||||
|
||||
dtrmm_kernel_L1_M2_42:
|
||||
.Ldtrmm_kernel_L1_M2_42:
|
||||
|
||||
KERNEL2x1_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L1_M2_42
|
||||
bgt .Ldtrmm_kernel_L1_M2_42
|
||||
|
||||
dtrmm_kernel_L1_M2_100:
|
||||
.Ldtrmm_kernel_L1_M2_100:
|
||||
|
||||
SAVE2x1
|
||||
|
||||
@@ -1309,15 +1309,15 @@ dtrmm_kernel_L1_M2_100:
|
||||
add tempOffset, tempOffset, #2
|
||||
#endif
|
||||
|
||||
dtrmm_kernel_L1_M2_END:
|
||||
.Ldtrmm_kernel_L1_M2_END:
|
||||
|
||||
|
||||
dtrmm_kernel_L1_M1_BEGIN:
|
||||
.Ldtrmm_kernel_L1_M1_BEGIN:
|
||||
|
||||
tst counterI, #1 // counterI = counterI % 2
|
||||
ble dtrmm_kernel_L1_END
|
||||
ble .Ldtrmm_kernel_L1_END
|
||||
|
||||
dtrmm_kernel_L1_M1_20:
|
||||
.Ldtrmm_kernel_L1_M1_20:
|
||||
|
||||
INIT1x1
|
||||
|
||||
@@ -1341,9 +1341,9 @@ dtrmm_kernel_L1_M1_20:
|
||||
|
||||
asr counterL , tempK, #3 // counterL = counterL / 8
|
||||
cmp counterL , #0
|
||||
ble dtrmm_kernel_L1_M1_40
|
||||
ble .Ldtrmm_kernel_L1_M1_40
|
||||
|
||||
dtrmm_kernel_L1_M1_22:
|
||||
.Ldtrmm_kernel_L1_M1_22:
|
||||
KERNEL1x1_SUB
|
||||
KERNEL1x1_SUB
|
||||
KERNEL1x1_SUB
|
||||
@@ -1355,30 +1355,30 @@ dtrmm_kernel_L1_M1_22:
|
||||
KERNEL1x1_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L1_M1_22
|
||||
bgt .Ldtrmm_kernel_L1_M1_22
|
||||
|
||||
|
||||
dtrmm_kernel_L1_M1_40:
|
||||
.Ldtrmm_kernel_L1_M1_40:
|
||||
|
||||
ands counterL , tempK, #7 // counterL = counterL % 8
|
||||
ble dtrmm_kernel_L1_M1_100
|
||||
ble .Ldtrmm_kernel_L1_M1_100
|
||||
|
||||
dtrmm_kernel_L1_M1_42:
|
||||
.Ldtrmm_kernel_L1_M1_42:
|
||||
|
||||
KERNEL1x1_SUB
|
||||
|
||||
subs counterL, counterL, #1
|
||||
bgt dtrmm_kernel_L1_M1_42
|
||||
bgt .Ldtrmm_kernel_L1_M1_42
|
||||
|
||||
dtrmm_kernel_L1_M1_100:
|
||||
.Ldtrmm_kernel_L1_M1_100:
|
||||
|
||||
SAVE1x1
|
||||
|
||||
|
||||
dtrmm_kernel_L1_END:
|
||||
.Ldtrmm_kernel_L1_END:
|
||||
|
||||
|
||||
dtrmm_kernel_L999:
|
||||
.Ldtrmm_kernel_L999:
|
||||
mov x0, #0 // set return value
|
||||
ldp d8, d9, [sp, #(0 * 16)]
|
||||
ldp d10, d11, [sp, #(1 * 16)]
|
||||
|
||||
Reference in New Issue
Block a user