From d2bda3b56a06a30623a840408ee8874d54d1058c Mon Sep 17 00:00:00 2001 From: CodesWithWolves Date: Wed, 31 Mar 2021 15:38:07 -0400 Subject: [PATCH] Remove Unnecessary/Erroneous Reads In sgemm_tcopy_16.S COPY1x8 Macro There appears to have been some code leak when copying from the COPY2x8 macro above where we're reading 8 bytes into d4-d7 directly after reading 4 bytes into s4-s7. These 32 bytes in d4-7 are unused and can possibly overrun the boundary of allocated memory -- Valgrind detected this which is what dragged my attention to it for a 128,1 copy. Additionally, there is no need to update the addresses stored in A0-A7 as the only possible paths after running this macro will overwrite A0-7 if looping to the next 8 rows, or overwrite A0-3 if moving to 4 rows -- in which case A4-7 are unused. --- kernel/arm64/sgemm_tcopy_16.S | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/kernel/arm64/sgemm_tcopy_16.S b/kernel/arm64/sgemm_tcopy_16.S index 12b80bdca..46198b3a2 100644 --- a/kernel/arm64/sgemm_tcopy_16.S +++ b/kernel/arm64/sgemm_tcopy_16.S @@ -270,11 +270,6 @@ All rights reserved. ldr s1, [A02] ldr s2, [A03] ldr s3, [A04] - - add A01, A01, #4 - add A02, A02, #4 - add A03, A03, #4 - add A04, A04, #4 stp s0, s1, [B04] add B04, B04, #8 @@ -285,11 +280,6 @@ All rights reserved. ldr s5, [A06] ldr s6, [A07] ldr s7, [A08] - - ldr d4, [A05], #8 - ldr d5, [A06], #8 - ldr d6, [A07], #8 - ldr d7, [A08], #8 stp s4, s5, [B04] add B04, B04, #8