ARM64: Convert all labels to local labels

While debugging/profiling applications using perf or other tools, the
kernels appear scattered in the profile reports. This is because the labels
within the kernels are not local and each label is shown as a separate
function.

To avoid this, all the labels within the kernels are changed to local
labels.
This commit is contained in:
Ashwin Sekhar T K
2017-10-24 10:47:11 +00:00
parent 627133f9ad
commit a0128aa489
50 changed files with 4469 additions and 4469 deletions

View File

@@ -549,11 +549,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
mov counterJ, origN
asr counterJ, counterJ, #2 // J = J / 4
cmp counterJ, #0
ble dtrmm_kernel_L2_BEGIN
ble .Ldtrmm_kernel_L2_BEGIN
/******************************************************************************/
dtrmm_kernel_L4_BEGIN:
.Ldtrmm_kernel_L4_BEGIN:
mov pCRow0, pC // pCRow0 = C
add pC, pC, LDC, lsl #2
@@ -563,14 +563,14 @@ dtrmm_kernel_L4_BEGIN:
mov pA, origPA // pA = start of A array
dtrmm_kernel_L4_M4_BEGIN:
.Ldtrmm_kernel_L4_M4_BEGIN:
mov counterI, origM
asr counterI, counterI, #2 // counterI = counterI / 4
cmp counterI, #0
ble dtrmm_kernel_L4_M2_BEGIN
ble .Ldtrmm_kernel_L4_M2_BEGIN
dtrmm_kernel_L4_M4_20:
.Ldtrmm_kernel_L4_M4_20:
#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA))
mov pB, origPB
@@ -591,57 +591,57 @@ dtrmm_kernel_L4_M4_20:
asr counterL , tempK, #1 // L = K / 2
cmp counterL , #2 // is there at least 4 to do?
blt dtrmm_kernel_L4_M4_32
blt .Ldtrmm_kernel_L4_M4_32
KERNEL4x4_I // do one in the K
KERNEL4x4_M2 // do another in the K
subs counterL, counterL, #2
ble dtrmm_kernel_L4_M4_22a
ble .Ldtrmm_kernel_L4_M4_22a
.align 5
dtrmm_kernel_L4_M4_22:
.Ldtrmm_kernel_L4_M4_22:
KERNEL4x4_M1
KERNEL4x4_M2
subs counterL, counterL, #1
bgt dtrmm_kernel_L4_M4_22
bgt .Ldtrmm_kernel_L4_M4_22
dtrmm_kernel_L4_M4_22a:
.Ldtrmm_kernel_L4_M4_22a:
KERNEL4x4_M1
KERNEL4x4_E
b dtrmm_kernel_L4_M4_44
b .Ldtrmm_kernel_L4_M4_44
dtrmm_kernel_L4_M4_32:
.Ldtrmm_kernel_L4_M4_32:
tst counterL, #1
ble dtrmm_kernel_L4_M4_40
ble .Ldtrmm_kernel_L4_M4_40
KERNEL4x4_I
KERNEL4x4_E
b dtrmm_kernel_L4_M4_44
b .Ldtrmm_kernel_L4_M4_44
dtrmm_kernel_L4_M4_40:
.Ldtrmm_kernel_L4_M4_40:
INIT4x4
dtrmm_kernel_L4_M4_44:
.Ldtrmm_kernel_L4_M4_44:
ands counterL , tempK, #1
ble dtrmm_kernel_L4_M4_100
ble .Ldtrmm_kernel_L4_M4_100
dtrmm_kernel_L4_M4_46:
.Ldtrmm_kernel_L4_M4_46:
KERNEL4x4_SUB
dtrmm_kernel_L4_M4_100:
.Ldtrmm_kernel_L4_M4_100:
SAVE4x4
@@ -660,20 +660,20 @@ dtrmm_kernel_L4_M4_100:
add tempOffset, tempOffset, #4
#endif
dtrmm_kernel_L4_M4_END:
.Ldtrmm_kernel_L4_M4_END:
subs counterI, counterI, #1
bne dtrmm_kernel_L4_M4_20
bne .Ldtrmm_kernel_L4_M4_20
dtrmm_kernel_L4_M2_BEGIN:
.Ldtrmm_kernel_L4_M2_BEGIN:
mov counterI, origM
tst counterI , #3
ble dtrmm_kernel_L4_END
ble .Ldtrmm_kernel_L4_END
tst counterI, #2 // counterI = counterI / 2
ble dtrmm_kernel_L4_M1_BEGIN
ble .Ldtrmm_kernel_L4_M1_BEGIN
dtrmm_kernel_L4_M2_20:
.Ldtrmm_kernel_L4_M2_20:
INIT2x4
@@ -697,9 +697,9 @@ dtrmm_kernel_L4_M2_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL , #0
ble dtrmm_kernel_L4_M2_40
ble .Ldtrmm_kernel_L4_M2_40
dtrmm_kernel_L4_M2_22:
.Ldtrmm_kernel_L4_M2_22:
KERNEL2x4_SUB
KERNEL2x4_SUB
@@ -712,22 +712,22 @@ dtrmm_kernel_L4_M2_22:
KERNEL2x4_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L4_M2_22
bgt .Ldtrmm_kernel_L4_M2_22
dtrmm_kernel_L4_M2_40:
.Ldtrmm_kernel_L4_M2_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L4_M2_100
ble .Ldtrmm_kernel_L4_M2_100
dtrmm_kernel_L4_M2_42:
.Ldtrmm_kernel_L4_M2_42:
KERNEL2x4_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L4_M2_42
bgt .Ldtrmm_kernel_L4_M2_42
dtrmm_kernel_L4_M2_100:
.Ldtrmm_kernel_L4_M2_100:
SAVE2x4
@@ -747,15 +747,15 @@ dtrmm_kernel_L4_M2_100:
add tempOffset, tempOffset, #2
#endif
dtrmm_kernel_L4_M2_END:
.Ldtrmm_kernel_L4_M2_END:
dtrmm_kernel_L4_M1_BEGIN:
.Ldtrmm_kernel_L4_M1_BEGIN:
tst counterI, #1 // counterI = counterI % 2
ble dtrmm_kernel_L4_END
ble .Ldtrmm_kernel_L4_END
dtrmm_kernel_L4_M1_20:
.Ldtrmm_kernel_L4_M1_20:
INIT1x4
@@ -779,9 +779,9 @@ dtrmm_kernel_L4_M1_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL , #0
ble dtrmm_kernel_L4_M1_40
ble .Ldtrmm_kernel_L4_M1_40
dtrmm_kernel_L4_M1_22:
.Ldtrmm_kernel_L4_M1_22:
KERNEL1x4_SUB
KERNEL1x4_SUB
KERNEL1x4_SUB
@@ -793,22 +793,22 @@ dtrmm_kernel_L4_M1_22:
KERNEL1x4_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L4_M1_22
bgt .Ldtrmm_kernel_L4_M1_22
dtrmm_kernel_L4_M1_40:
.Ldtrmm_kernel_L4_M1_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L4_M1_100
ble .Ldtrmm_kernel_L4_M1_100
dtrmm_kernel_L4_M1_42:
.Ldtrmm_kernel_L4_M1_42:
KERNEL1x4_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L4_M1_42
bgt .Ldtrmm_kernel_L4_M1_42
dtrmm_kernel_L4_M1_100:
.Ldtrmm_kernel_L4_M1_100:
SAVE1x4
@@ -828,7 +828,7 @@ dtrmm_kernel_L4_M1_100:
add tempOffset, tempOffset, #1
#endif
dtrmm_kernel_L4_END:
.Ldtrmm_kernel_L4_END:
lsl temp, origK, #5
add origPB, origPB, temp // B = B + K * 4 * 8
@@ -838,19 +838,19 @@ dtrmm_kernel_L4_END:
#endif
subs counterJ, counterJ , #1 // j--
bgt dtrmm_kernel_L4_BEGIN
bgt .Ldtrmm_kernel_L4_BEGIN
/******************************************************************************/
dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction
.Ldtrmm_kernel_L2_BEGIN: // less than 2 left in N direction
mov counterJ , origN
tst counterJ , #3
ble dtrmm_kernel_L999 // error, N was less than 4?
ble .Ldtrmm_kernel_L999 // error, N was less than 4?
tst counterJ , #2
ble dtrmm_kernel_L1_BEGIN
ble .Ldtrmm_kernel_L1_BEGIN
mov pCRow0, pC // pCRow0 = pC
@@ -863,14 +863,14 @@ dtrmm_kernel_L2_BEGIN: // less than 2 left in N direction
mov pA, origPA // pA = A
dtrmm_kernel_L2_M4_BEGIN:
.Ldtrmm_kernel_L2_M4_BEGIN:
mov counterI, origM
asr counterI, counterI, #2 // counterI = counterI / 4
cmp counterI,#0
ble dtrmm_kernel_L2_M2_BEGIN
ble .Ldtrmm_kernel_L2_M2_BEGIN
dtrmm_kernel_L2_M4_20:
.Ldtrmm_kernel_L2_M4_20:
INIT4x2
@@ -894,10 +894,10 @@ dtrmm_kernel_L2_M4_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL,#0
ble dtrmm_kernel_L2_M4_40
ble .Ldtrmm_kernel_L2_M4_40
.align 5
dtrmm_kernel_L2_M4_22:
.Ldtrmm_kernel_L2_M4_22:
KERNEL4x2_SUB
KERNEL4x2_SUB
KERNEL4x2_SUB
@@ -909,22 +909,22 @@ dtrmm_kernel_L2_M4_22:
KERNEL4x2_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L2_M4_22
bgt .Ldtrmm_kernel_L2_M4_22
dtrmm_kernel_L2_M4_40:
.Ldtrmm_kernel_L2_M4_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L2_M4_100
ble .Ldtrmm_kernel_L2_M4_100
dtrmm_kernel_L2_M4_42:
.Ldtrmm_kernel_L2_M4_42:
KERNEL4x2_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L2_M4_42
bgt .Ldtrmm_kernel_L2_M4_42
dtrmm_kernel_L2_M4_100:
.Ldtrmm_kernel_L2_M4_100:
SAVE4x2
@@ -944,22 +944,22 @@ dtrmm_kernel_L2_M4_100:
add tempOffset, tempOffset, #4
#endif
dtrmm_kernel_L2_M4_END:
.Ldtrmm_kernel_L2_M4_END:
subs counterI, counterI, #1
bgt dtrmm_kernel_L2_M4_20
bgt .Ldtrmm_kernel_L2_M4_20
dtrmm_kernel_L2_M2_BEGIN:
.Ldtrmm_kernel_L2_M2_BEGIN:
mov counterI, origM
tst counterI , #3
ble dtrmm_kernel_L2_END
ble .Ldtrmm_kernel_L2_END
tst counterI, #2 // counterI = counterI / 2
ble dtrmm_kernel_L2_M1_BEGIN
ble .Ldtrmm_kernel_L2_M1_BEGIN
dtrmm_kernel_L2_M2_20:
.Ldtrmm_kernel_L2_M2_20:
INIT2x2
@@ -983,9 +983,9 @@ dtrmm_kernel_L2_M2_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL,#0
ble dtrmm_kernel_L2_M2_40
ble .Ldtrmm_kernel_L2_M2_40
dtrmm_kernel_L2_M2_22:
.Ldtrmm_kernel_L2_M2_22:
KERNEL2x2_SUB
KERNEL2x2_SUB
@@ -998,22 +998,22 @@ dtrmm_kernel_L2_M2_22:
KERNEL2x2_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L2_M2_22
bgt .Ldtrmm_kernel_L2_M2_22
dtrmm_kernel_L2_M2_40:
.Ldtrmm_kernel_L2_M2_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L2_M2_100
ble .Ldtrmm_kernel_L2_M2_100
dtrmm_kernel_L2_M2_42:
.Ldtrmm_kernel_L2_M2_42:
KERNEL2x2_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L2_M2_42
bgt .Ldtrmm_kernel_L2_M2_42
dtrmm_kernel_L2_M2_100:
.Ldtrmm_kernel_L2_M2_100:
SAVE2x2
@@ -1033,15 +1033,15 @@ dtrmm_kernel_L2_M2_100:
add tempOffset, tempOffset, #2
#endif
dtrmm_kernel_L2_M2_END:
.Ldtrmm_kernel_L2_M2_END:
dtrmm_kernel_L2_M1_BEGIN:
.Ldtrmm_kernel_L2_M1_BEGIN:
tst counterI, #1 // counterI = counterI % 2
ble dtrmm_kernel_L2_END
ble .Ldtrmm_kernel_L2_END
dtrmm_kernel_L2_M1_20:
.Ldtrmm_kernel_L2_M1_20:
INIT1x2
@@ -1065,9 +1065,9 @@ dtrmm_kernel_L2_M1_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL, #0
ble dtrmm_kernel_L2_M1_40
ble .Ldtrmm_kernel_L2_M1_40
dtrmm_kernel_L2_M1_22:
.Ldtrmm_kernel_L2_M1_22:
KERNEL1x2_SUB
KERNEL1x2_SUB
KERNEL1x2_SUB
@@ -1079,22 +1079,22 @@ dtrmm_kernel_L2_M1_22:
KERNEL1x2_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L2_M1_22
bgt .Ldtrmm_kernel_L2_M1_22
dtrmm_kernel_L2_M1_40:
.Ldtrmm_kernel_L2_M1_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L2_M1_100
ble .Ldtrmm_kernel_L2_M1_100
dtrmm_kernel_L2_M1_42:
.Ldtrmm_kernel_L2_M1_42:
KERNEL1x2_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L2_M1_42
bgt .Ldtrmm_kernel_L2_M1_42
dtrmm_kernel_L2_M1_100:
.Ldtrmm_kernel_L2_M1_100:
SAVE1x2
@@ -1114,7 +1114,7 @@ dtrmm_kernel_L2_M1_100:
add tempOffset, tempOffset, #1
#endif
dtrmm_kernel_L2_END:
.Ldtrmm_kernel_L2_END:
#if !defined(LEFT)
add tempOffset, tempOffset, #2
#endif
@@ -1122,11 +1122,11 @@ dtrmm_kernel_L2_END:
/******************************************************************************/
dtrmm_kernel_L1_BEGIN:
.Ldtrmm_kernel_L1_BEGIN:
mov counterJ , origN
tst counterJ , #1
ble dtrmm_kernel_L999 // done
ble .Ldtrmm_kernel_L999 // done
mov pCRow0, pC // pCRow0 = C
@@ -1138,14 +1138,14 @@ dtrmm_kernel_L1_BEGIN:
mov pA, origPA // pA = A
dtrmm_kernel_L1_M4_BEGIN:
.Ldtrmm_kernel_L1_M4_BEGIN:
mov counterI, origM
asr counterI, counterI, #2 // counterI = counterI / 4
cmp counterI, #0
ble dtrmm_kernel_L1_M2_BEGIN
ble .Ldtrmm_kernel_L1_M2_BEGIN
dtrmm_kernel_L1_M4_20:
.Ldtrmm_kernel_L1_M4_20:
INIT4x1
@@ -1169,10 +1169,10 @@ dtrmm_kernel_L1_M4_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL , #0
ble dtrmm_kernel_L1_M4_40
ble .Ldtrmm_kernel_L1_M4_40
.align 5
dtrmm_kernel_L1_M4_22:
.Ldtrmm_kernel_L1_M4_22:
KERNEL4x1_SUB
KERNEL4x1_SUB
KERNEL4x1_SUB
@@ -1184,22 +1184,22 @@ dtrmm_kernel_L1_M4_22:
KERNEL4x1_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L1_M4_22
bgt .Ldtrmm_kernel_L1_M4_22
dtrmm_kernel_L1_M4_40:
.Ldtrmm_kernel_L1_M4_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L1_M4_100
ble .Ldtrmm_kernel_L1_M4_100
dtrmm_kernel_L1_M4_42:
.Ldtrmm_kernel_L1_M4_42:
KERNEL4x1_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L1_M4_42
bgt .Ldtrmm_kernel_L1_M4_42
dtrmm_kernel_L1_M4_100:
.Ldtrmm_kernel_L1_M4_100:
SAVE4x1
@@ -1220,22 +1220,22 @@ dtrmm_kernel_L1_M4_100:
add tempOffset, tempOffset, #4
#endif
dtrmm_kernel_L1_M4_END:
.Ldtrmm_kernel_L1_M4_END:
subs counterI, counterI, #1
bgt dtrmm_kernel_L1_M4_20
bgt .Ldtrmm_kernel_L1_M4_20
dtrmm_kernel_L1_M2_BEGIN:
.Ldtrmm_kernel_L1_M2_BEGIN:
mov counterI, origM
tst counterI , #3
ble dtrmm_kernel_L1_END
ble .Ldtrmm_kernel_L1_END
tst counterI, #2 // counterI = counterI / 2
ble dtrmm_kernel_L1_M1_BEGIN
ble .Ldtrmm_kernel_L1_M1_BEGIN
dtrmm_kernel_L1_M2_20:
.Ldtrmm_kernel_L1_M2_20:
INIT2x1
@@ -1259,9 +1259,9 @@ dtrmm_kernel_L1_M2_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL , #0
ble dtrmm_kernel_L1_M2_40
ble .Ldtrmm_kernel_L1_M2_40
dtrmm_kernel_L1_M2_22:
.Ldtrmm_kernel_L1_M2_22:
KERNEL2x1_SUB
KERNEL2x1_SUB
@@ -1274,22 +1274,22 @@ dtrmm_kernel_L1_M2_22:
KERNEL2x1_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L1_M2_22
bgt .Ldtrmm_kernel_L1_M2_22
dtrmm_kernel_L1_M2_40:
.Ldtrmm_kernel_L1_M2_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L1_M2_100
ble .Ldtrmm_kernel_L1_M2_100
dtrmm_kernel_L1_M2_42:
.Ldtrmm_kernel_L1_M2_42:
KERNEL2x1_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L1_M2_42
bgt .Ldtrmm_kernel_L1_M2_42
dtrmm_kernel_L1_M2_100:
.Ldtrmm_kernel_L1_M2_100:
SAVE2x1
@@ -1309,15 +1309,15 @@ dtrmm_kernel_L1_M2_100:
add tempOffset, tempOffset, #2
#endif
dtrmm_kernel_L1_M2_END:
.Ldtrmm_kernel_L1_M2_END:
dtrmm_kernel_L1_M1_BEGIN:
.Ldtrmm_kernel_L1_M1_BEGIN:
tst counterI, #1 // counterI = counterI % 2
ble dtrmm_kernel_L1_END
ble .Ldtrmm_kernel_L1_END
dtrmm_kernel_L1_M1_20:
.Ldtrmm_kernel_L1_M1_20:
INIT1x1
@@ -1341,9 +1341,9 @@ dtrmm_kernel_L1_M1_20:
asr counterL , tempK, #3 // counterL = counterL / 8
cmp counterL , #0
ble dtrmm_kernel_L1_M1_40
ble .Ldtrmm_kernel_L1_M1_40
dtrmm_kernel_L1_M1_22:
.Ldtrmm_kernel_L1_M1_22:
KERNEL1x1_SUB
KERNEL1x1_SUB
KERNEL1x1_SUB
@@ -1355,30 +1355,30 @@ dtrmm_kernel_L1_M1_22:
KERNEL1x1_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L1_M1_22
bgt .Ldtrmm_kernel_L1_M1_22
dtrmm_kernel_L1_M1_40:
.Ldtrmm_kernel_L1_M1_40:
ands counterL , tempK, #7 // counterL = counterL % 8
ble dtrmm_kernel_L1_M1_100
ble .Ldtrmm_kernel_L1_M1_100
dtrmm_kernel_L1_M1_42:
.Ldtrmm_kernel_L1_M1_42:
KERNEL1x1_SUB
subs counterL, counterL, #1
bgt dtrmm_kernel_L1_M1_42
bgt .Ldtrmm_kernel_L1_M1_42
dtrmm_kernel_L1_M1_100:
.Ldtrmm_kernel_L1_M1_100:
SAVE1x1
dtrmm_kernel_L1_END:
.Ldtrmm_kernel_L1_END:
dtrmm_kernel_L999:
.Ldtrmm_kernel_L999:
mov x0, #0 // set return value
ldp d8, d9, [sp, #(0 * 16)]
ldp d10, d11, [sp, #(1 * 16)]