ARM64: Convert all labels to local labels

While debugging/profiling applications using perf or other tools, the
kernels appear scattered in the profile reports. This is because the labels
within the kernels are not local and each label is shown as a separate
function.

To avoid this, all the labels within the kernels are changed to local
labels.
This commit is contained in:
Ashwin Sekhar T K
2017-10-24 10:47:11 +00:00
parent 627133f9ad
commit a0128aa489
50 changed files with 4469 additions and 4469 deletions

View File

@@ -353,13 +353,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
lsl LDA, LDA, #3 // LDA = LDA * SIZE
dgemm_ncopy_L8_BEGIN:
.Ldgemm_ncopy_L8_BEGIN:
asr J, N, #3 // J = N / 8
cmp J, #0
ble dgemm_ncopy_L4_BEGIN
ble .Ldgemm_ncopy_L4_BEGIN
dgemm_ncopy_L8_M8_BEGIN:
.Ldgemm_ncopy_L8_M8_BEGIN:
mov A01, A00
add A02, A01, LDA
@@ -374,46 +374,46 @@ dgemm_ncopy_L8_M8_BEGIN:
asr I, M, #3 // I = M / 8
cmp I, #0
ble dgemm_ncopy_L8_M8_40
ble .Ldgemm_ncopy_L8_M8_40
dgemm_ncopy_L8_M8_20:
.Ldgemm_ncopy_L8_M8_20:
COPY8x8
subs I , I , #1
bne dgemm_ncopy_L8_M8_20
bne .Ldgemm_ncopy_L8_M8_20
dgemm_ncopy_L8_M8_40:
.Ldgemm_ncopy_L8_M8_40:
and I, M , #7
cmp I, #0
ble dgemm_ncopy_L8_M8_END
ble .Ldgemm_ncopy_L8_M8_END
dgemm_ncopy_L8_M8_60:
.Ldgemm_ncopy_L8_M8_60:
COPY1x8
subs I , I , #1
bne dgemm_ncopy_L8_M8_60
bne .Ldgemm_ncopy_L8_M8_60
dgemm_ncopy_L8_M8_END:
.Ldgemm_ncopy_L8_M8_END:
subs J , J, #1 // j--
bne dgemm_ncopy_L8_M8_BEGIN
bne .Ldgemm_ncopy_L8_M8_BEGIN
/*********************************************************************************************/
dgemm_ncopy_L4_BEGIN:
.Ldgemm_ncopy_L4_BEGIN:
tst N, #7
ble dgemm_ncopy_L999
ble .Ldgemm_ncopy_L999
tst N, #4
ble dgemm_ncopy_L2_BEGIN
ble .Ldgemm_ncopy_L2_BEGIN
dgemm_ncopy_L4_M8_BEGIN:
.Ldgemm_ncopy_L4_M8_BEGIN:
mov A01, A00
add A02, A01, LDA
@@ -423,118 +423,118 @@ dgemm_ncopy_L4_M8_BEGIN:
asr I, M, #3 // I = M / 8
cmp I, #0
ble dgemm_ncopy_L4_M8_40
ble .Ldgemm_ncopy_L4_M8_40
dgemm_ncopy_L4_M8_20:
.Ldgemm_ncopy_L4_M8_20:
COPY8x4
subs I , I , #1
bne dgemm_ncopy_L4_M8_20
bne .Ldgemm_ncopy_L4_M8_20
dgemm_ncopy_L4_M8_40:
.Ldgemm_ncopy_L4_M8_40:
and I, M , #7
cmp I, #0
ble dgemm_ncopy_L4_M8_END
ble .Ldgemm_ncopy_L4_M8_END
dgemm_ncopy_L4_M8_60:
.Ldgemm_ncopy_L4_M8_60:
COPY1x4
subs I , I , #1
bne dgemm_ncopy_L4_M8_60
bne .Ldgemm_ncopy_L4_M8_60
dgemm_ncopy_L4_M8_END:
.Ldgemm_ncopy_L4_M8_END:
/*********************************************************************************************/
dgemm_ncopy_L2_BEGIN:
.Ldgemm_ncopy_L2_BEGIN:
tst N, #3
ble dgemm_ncopy_L999
ble .Ldgemm_ncopy_L999
tst N, #2
ble dgemm_ncopy_L1_BEGIN
ble .Ldgemm_ncopy_L1_BEGIN
dgemm_ncopy_L2_M8_BEGIN:
.Ldgemm_ncopy_L2_M8_BEGIN:
mov A01, A00
add A02, A01, LDA
add A00, A02, LDA
asr I, M, #3 // I = M / 8
cmp I, #0
ble dgemm_ncopy_L2_M8_40
ble .Ldgemm_ncopy_L2_M8_40
dgemm_ncopy_L2_M8_20:
.Ldgemm_ncopy_L2_M8_20:
COPY8x2
subs I , I , #1
bne dgemm_ncopy_L2_M8_20
bne .Ldgemm_ncopy_L2_M8_20
dgemm_ncopy_L2_M8_40:
.Ldgemm_ncopy_L2_M8_40:
and I, M , #7
cmp I, #0
ble dgemm_ncopy_L2_M8_END
ble .Ldgemm_ncopy_L2_M8_END
dgemm_ncopy_L2_M8_60:
.Ldgemm_ncopy_L2_M8_60:
COPY1x2
subs I , I , #1
bne dgemm_ncopy_L2_M8_60
bne .Ldgemm_ncopy_L2_M8_60
dgemm_ncopy_L2_M8_END:
.Ldgemm_ncopy_L2_M8_END:
/*********************************************************************************************/
dgemm_ncopy_L1_BEGIN:
.Ldgemm_ncopy_L1_BEGIN:
tst N, #1
ble dgemm_ncopy_L999
ble .Ldgemm_ncopy_L999
dgemm_ncopy_L1_M8_BEGIN:
.Ldgemm_ncopy_L1_M8_BEGIN:
mov A01, A00
asr I, M, #3 // I = M / 8
cmp I, #0
ble dgemm_ncopy_L1_M8_40
ble .Ldgemm_ncopy_L1_M8_40
dgemm_ncopy_L1_M8_20:
.Ldgemm_ncopy_L1_M8_20:
COPY8x1
subs I , I , #1
bne dgemm_ncopy_L1_M8_20
bne .Ldgemm_ncopy_L1_M8_20
dgemm_ncopy_L1_M8_40:
.Ldgemm_ncopy_L1_M8_40:
and I, M , #7
cmp I, #0
ble dgemm_ncopy_L1_M8_END
ble .Ldgemm_ncopy_L1_M8_END
dgemm_ncopy_L1_M8_60:
.Ldgemm_ncopy_L1_M8_60:
COPY1x1
subs I , I , #1
bne dgemm_ncopy_L1_M8_60
bne .Ldgemm_ncopy_L1_M8_60
dgemm_ncopy_L1_M8_END:
.Ldgemm_ncopy_L1_M8_END:
dgemm_ncopy_L999:
.Ldgemm_ncopy_L999:
mov x0, #0
RESTORE_REGS