Do not use vsub to clear the register values since it doesn't work with non-normal numbers.

This commit is contained in:
Yichao Yu
2016-01-04 23:36:25 -05:00
parent 5f2fa15e04
commit 594b9f4c73
33 changed files with 283 additions and 242 deletions

View File

@@ -147,7 +147,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT2x2
vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -368,7 +367,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT1x2
vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16
vmov.f32 s20, s16
vmov.f32 s21, s16
@@ -550,7 +548,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT2x1
vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16
vmov.f32 s18, s16
vmov.f32 s19, s16
@@ -730,7 +727,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT1x1
vsub.f32 s16 , s16 , s16
vmov.f32 s17, s16
vmov.f32 s24, s16
vmov.f32 s25, s16
@@ -1008,9 +1004,12 @@ cgemm_kernel_L2_M2_32:
b cgemm_kernel_L2_M2_44
cgemm_f32zero:
.word 0x00000000
cgemm_kernel_L2_M2_40:
vldr.f32 s16 , cgemm_f32zero
INIT2x2
@@ -1044,6 +1043,7 @@ cgemm_kernel_L2_M1_BEGIN:
cgemm_kernel_L2_M1_20:
vldr.f32 s16 , cgemm_f32zero
INIT1x2
mov BO, BC
@@ -1219,6 +1219,7 @@ cgemm_kernel_L1_M2_32:
cgemm_kernel_L1_M2_40:
vldr.f32 s16 , =0
INIT2x1
@@ -1252,6 +1253,7 @@ cgemm_kernel_L1_M1_BEGIN:
cgemm_kernel_L1_M1_20:
vldr.f32 s16 , =0
INIT1x1
mov BO, BC