diff --git a/kernel/arm/asum_vfp.S b/kernel/arm/asum_vfp.S index 2b6ceb191..afa936e87 100644 --- a/kernel/arm/asum_vfp.S +++ b/kernel/arm/asum_vfp.S @@ -368,11 +368,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .align 5 #if defined(DOUBLE) - vsub.f64 d0 , d0 , d0 - vsub.f64 d1 , d1 , d1 + vldr.f64 d0 , =0 + vldr.f64 d1 , =0 #else - vsub.f32 s0 , s0 , s0 - vsub.f32 s1 , s1 , s1 + vldr.f32 s0 , =0 + vldr.f32 s1 , =0 #endif cmp N, #0 diff --git a/kernel/arm/cdot_vfp.S b/kernel/arm/cdot_vfp.S index 2ccda3397..b8edb49f6 100644 --- a/kernel/arm/cdot_vfp.S +++ b/kernel/arm/cdot_vfp.S @@ -188,10 +188,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov Y, OLD_Y ldr INC_Y, OLD_INC_Y - vsub.f32 s0 , s0 , s0 - vsub.f32 s1 , s1 , s1 - vsub.f32 s2 , s2 , s2 - vsub.f32 s3 , s3 , s3 + vldr.f32 s0 , =0 + vldr.f32 s1 , =0 + vldr.f32 s2 , =0 + vldr.f32 s3 , =0 cmp N, #0 ble cdot_kernel_L999 diff --git a/kernel/arm/cgemm_kernel_2x2_vfp.S b/kernel/arm/cgemm_kernel_2x2_vfp.S index a059ef505..d2c6028e9 100644 --- a/kernel/arm/cgemm_kernel_2x2_vfp.S +++ b/kernel/arm/cgemm_kernel_2x2_vfp.S @@ -138,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -340,7 +340,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -514,7 +514,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -681,7 +681,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 .endm diff --git a/kernel/arm/cgemm_kernel_2x2_vfpv3.S b/kernel/arm/cgemm_kernel_2x2_vfpv3.S index 8bc200c9f..3b64f6ce0 100644 --- a/kernel/arm/cgemm_kernel_2x2_vfpv3.S +++ b/kernel/arm/cgemm_kernel_2x2_vfpv3.S @@ -147,7 +147,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -368,7 +367,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -550,7 +548,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -730,7 +727,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s24, s16 vmov.f32 s25, s16 @@ -1008,9 +1004,12 @@ cgemm_kernel_L2_M2_32: b cgemm_kernel_L2_M2_44 +cgemm_f32zero: + .word 0x00000000 cgemm_kernel_L2_M2_40: + vldr.f32 s16 , cgemm_f32zero INIT2x2 @@ -1044,6 +1043,7 @@ cgemm_kernel_L2_M1_BEGIN: cgemm_kernel_L2_M1_20: + vldr.f32 s16 , cgemm_f32zero INIT1x2 mov BO, BC @@ -1219,6 +1219,7 @@ cgemm_kernel_L1_M2_32: cgemm_kernel_L1_M2_40: + vldr.f32 s16 , =0 INIT2x1 @@ -1252,6 +1253,7 @@ cgemm_kernel_L1_M1_BEGIN: cgemm_kernel_L1_M1_20: + vldr.f32 s16 , =0 INIT1x1 mov BO, BC diff --git a/kernel/arm/cgemv_n_vfp.S b/kernel/arm/cgemv_n_vfp.S index 712e7f0d8..cec818ef1 100644 --- a/kernel/arm/cgemv_n_vfp.S +++ b/kernel/arm/cgemv_n_vfp.S @@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F4 pld [ YO, #Y_PRE ] - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 .endm @@ -267,7 +267,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 .endm diff --git a/kernel/arm/cgemv_t_vfp.S b/kernel/arm/cgemv_t_vfp.S index 52276a06f..c164e95d4 100644 --- a/kernel/arm/cgemv_t_vfp.S +++ b/kernel/arm/cgemv_t_vfp.S @@ -116,10 +116,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 - vsub.f32 s14, s14, s14 - vsub.f32 s15, s15, s15 + vldr.f32 s12, =0 + vldr.f32 s13, =0 + vldr.f32 s14, =0 + vldr.f32 s15, =0 .endm @@ -172,8 +172,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 + vldr.f32 s12, =0 + vldr.f32 s13, =0 .endm @@ -215,10 +215,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 - vsub.f32 s14, s14, s14 - vsub.f32 s15, s15, s15 + vldr.f32 s12, =0 + vldr.f32 s13, =0 + vldr.f32 s14, =0 + vldr.f32 s15, =0 .endm @@ -281,8 +281,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 + vldr.f32 s12, =0 + vldr.f32 s13, =0 .endm diff --git a/kernel/arm/ctrmm_kernel_2x2_vfp.S b/kernel/arm/ctrmm_kernel_2x2_vfp.S index a48c8608d..e6b8a99f3 100644 --- a/kernel/arm/ctrmm_kernel_2x2_vfp.S +++ b/kernel/arm/ctrmm_kernel_2x2_vfp.S @@ -136,7 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -301,10 +301,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + vldr.f32 s4, =0 + vldr.f32 s5, =0 + vldr.f32 s6, =0 + vldr.f32 s7, =0 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -318,10 +318,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmias CO1, { s4 - s7 } - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + vldr.f32 s4, =0 + vldr.f32 s5, =0 + vldr.f32 s6, =0 + vldr.f32 s7, =0 FMAC_R1 s4 , s0 , s12 FMAC_I1 s5 , s0 , s13 @@ -343,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -490,8 +490,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + vldr.f32 s4, =0 + vldr.f32 s5, =0 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -500,8 +500,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmias CO1, { s4 - s5 } - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + vldr.f32 s4, =0 + vldr.f32 s5, =0 FMAC_R1 s4 , s0 , s12 FMAC_I1 s5 , s0 , s13 @@ -519,7 +519,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -663,10 +663,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + vldr.f32 s4, =0 + vldr.f32 s5, =0 + vldr.f32 s6, =0 + vldr.f32 s7, =0 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -689,7 +689,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 .endm @@ -795,8 +795,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + vldr.f32 s4, =0 + vldr.f32 s5, =0 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 diff --git a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S index f06e260ea..5171c6e9c 100644 --- a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S +++ b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S @@ -134,7 +134,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -351,7 +350,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -529,7 +527,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -706,7 +703,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s24, s16 vmov.f32 s25, s16 @@ -1020,9 +1016,12 @@ _L2_M2_32: b _L2_M2_44 +ctrmm_f32zero: + .word 0x00000000 _L2_M2_40: + vldr.f32 s16 , ctrmm_f32zero INIT2x2 @@ -1074,6 +1073,7 @@ _L2_M1_BEGIN: _L2_M1_20: + vldr.f32 s16 , ctrmm_f32zero INIT1x2 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1337,6 +1337,7 @@ _L1_M2_32: _L1_M2_40: + vldr.f32 s16 , =0 INIT2x1 @@ -1389,6 +1390,7 @@ _L1_M1_BEGIN: _L1_M1_20: + vldr.f32 s16 , =0 INIT1x1 #if (defined(LEFT) && defined(TRANSA)) || \ diff --git a/kernel/arm/ddot_vfp.S b/kernel/arm/ddot_vfp.S index 71b3c1ce8..b402bccf6 100644 --- a/kernel/arm/ddot_vfp.S +++ b/kernel/arm/ddot_vfp.S @@ -152,8 +152,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov Y, OLD_Y ldr INC_Y, OLD_INC_Y - vsub.f64 d0 , d0 , d0 - vsub.f64 d1 , d1 , d1 + vldr.f64 d0 , =0 + vldr.f64 d1 , =0 cmp N, #0 ble ddot_kernel_L999 diff --git a/kernel/arm/dgemm_kernel_4x2_vfp.S b/kernel/arm/dgemm_kernel_4x2_vfp.S index 9fb881d73..8bb2bd3be 100644 --- a/kernel/arm/dgemm_kernel_4x2_vfp.S +++ b/kernel/arm/dgemm_kernel_4x2_vfp.S @@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -173,7 +173,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9, d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -233,7 +233,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d12, d8 .endm @@ -283,7 +283,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -338,7 +338,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 .endm @@ -380,7 +380,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 .endm diff --git a/kernel/arm/dgemm_kernel_4x4_vfpv3.S b/kernel/arm/dgemm_kernel_4x4_vfpv3.S index 7c1dbae8a..32f3b6974 100644 --- a/kernel/arm/dgemm_kernel_4x4_vfpv3.S +++ b/kernel/arm/dgemm_kernel_4x4_vfpv3.S @@ -102,7 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x4 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -376,7 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x4 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -470,7 +470,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x4 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d20, d16 vmov.f64 d24, d16 vmov.f64 d28, d16 @@ -533,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -617,7 +617,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -678,7 +678,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d20, d16 .endm @@ -723,7 +723,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -782,7 +782,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 .endm @@ -826,7 +826,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 .endm diff --git a/kernel/arm/dtrmm_kernel_4x2_vfp.S b/kernel/arm/dtrmm_kernel_4x2_vfp.S index 3528e0860..45c8c0c0b 100644 --- a/kernel/arm/dtrmm_kernel_4x2_vfp.S +++ b/kernel/arm/dtrmm_kernel_4x2_vfp.S @@ -90,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -165,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9, d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d12, d8 .endm @@ -268,7 +268,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -318,7 +318,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 .endm @@ -357,7 +357,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 .endm diff --git a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S index 04cc451d1..cd11b146d 100644 --- a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S +++ b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S @@ -89,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x4 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x4 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -468,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x4 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d20, d16 vmov.f64 d24, d16 vmov.f64 d28, d16 @@ -527,7 +527,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -601,7 +601,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -656,7 +656,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d20, d16 .endm @@ -699,7 +699,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -753,7 +753,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 .endm @@ -794,7 +794,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 .endm diff --git a/kernel/arm/gemv_n_vfp.S b/kernel/arm/gemv_n_vfp.S index 505033c18..0e85e2eca 100644 --- a/kernel/arm/gemv_n_vfp.S +++ b/kernel/arm/gemv_n_vfp.S @@ -79,7 +79,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE+32 ] - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d10 , d8 vmov.f64 d11 , d8 @@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d12 , d12 , d12 + vldr.f64 d12 , =0 .endm @@ -185,7 +185,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f64 d12 , d12 , d12 + vldr.f64 d12 , =0 vmov.f64 d13 , d12 vmov.f64 d14 , d12 vmov.f64 d15 , d12 @@ -245,7 +245,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d12 , d12 , d12 + vldr.f64 d12 , =0 .endm @@ -279,7 +279,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 vmov.f32 s10 , s8 vmov.f32 s11 , s8 @@ -357,7 +357,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s12 , s12 , s12 + vldr.f32 s12 , =0 .endm @@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f32 s12 , s12 , s12 + vldr.f32 s12 , =0 vmov.f32 s13 , s12 vmov.f32 s14 , s12 vmov.f32 s15 , s12 @@ -445,7 +445,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s12 , s12 , s12 + vldr.f32 s12 , =0 .endm diff --git a/kernel/arm/gemv_n_vfpv3.S b/kernel/arm/gemv_n_vfpv3.S index 0e9ba0c9c..84dbde4f2 100644 --- a/kernel/arm/gemv_n_vfpv3.S +++ b/kernel/arm/gemv_n_vfpv3.S @@ -79,7 +79,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE+32 ] - vsub.f64 d24 , d24 , d24 + vldr.f64 d24 , =0 vmov.f64 d25 , d24 vmov.f64 d26 , d24 vmov.f64 d27 , d24 @@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d24 , d24 , d24 + vldr.f64 d24 , =0 .endm @@ -175,7 +175,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S8 - vsub.f64 d24 , d24 , d24 + vldr.f64 d24 , =0 vmov.f64 d25 , d24 vmov.f64 d26 , d24 vmov.f64 d27 , d24 @@ -269,7 +269,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d24 , d24 , d24 + vldr.f64 d24 , =0 .endm @@ -302,7 +302,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] - vsub.f32 s24 , s24 , s24 + vldr.f32 s24 , =0 vmov.f32 s25 , s24 vmov.f32 s26 , s24 vmov.f32 s27 , s24 @@ -368,7 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s24 , s24 , s24 + vldr.f32 s24 , =0 .endm @@ -396,7 +396,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S8 - vsub.f32 s24 , s24 , s24 + vldr.f32 s24 , =0 vmov.f32 s25 , s24 vmov.f32 s26 , s24 vmov.f32 s27 , s24 @@ -489,7 +489,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s24 , s24 , s24 + vldr.f32 s24 , =0 .endm diff --git a/kernel/arm/gemv_t_vfp.S b/kernel/arm/gemv_t_vfp.S index 6a56ae9d1..da92e4aa2 100644 --- a/kernel/arm/gemv_t_vfp.S +++ b/kernel/arm/gemv_t_vfp.S @@ -75,8 +75,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f64 d2 , d2 , d2 - vsub.f64 d3 , d3 , d3 + vldr.f64 d2 , =0 + vldr.f64 d3 , =0 .endm @@ -123,7 +123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d2 , d2 , d2 + vldr.f64 d2 , =0 .endm @@ -160,8 +160,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f64 d2 , d2 , d2 - vsub.f64 d3 , d3 , d3 + vldr.f64 d2 , =0 + vldr.f64 d3 , =0 .endm @@ -224,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d2 , d2 , d2 + vldr.f64 d2 , =0 .endm @@ -276,8 +276,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f32 s2 , s2 , s2 - vsub.f32 s3 , s3 , s3 + vldr.f32 s2 , =0 + vldr.f32 s3 , =0 .endm @@ -321,7 +321,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s2 , s2 , s2 + vldr.f32 s2 , =0 .endm @@ -356,8 +356,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f32 s2 , s2 , s2 - vsub.f32 s3 , s3 , s3 + vldr.f32 s2 , =0 + vldr.f32 s3 , =0 .endm @@ -418,7 +418,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s2 , s2 , s2 + vldr.f32 s2 , =0 .endm diff --git a/kernel/arm/gemv_t_vfpv3.S b/kernel/arm/gemv_t_vfpv3.S index 7ae5799bc..26127870b 100644 --- a/kernel/arm/gemv_t_vfpv3.S +++ b/kernel/arm/gemv_t_vfpv3.S @@ -75,8 +75,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f64 d4 , d4 , d4 - vsub.f64 d5 , d5 , d5 + vldr.f64 d4 , =0 + vldr.f64 d5 , =0 .endm @@ -123,8 +123,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f64 d4 , d4 , d4 - vsub.f64 d5 , d5 , d5 + vldr.f64 d4 , =0 + vldr.f64 d5 , =0 .endm @@ -183,7 +183,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d4 , d4 , d4 + vldr.f64 d4 , =0 .endm @@ -220,7 +220,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d4 , d4 , d4 + vldr.f64 d4 , =0 .endm @@ -268,8 +268,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f32 s4 , s4 , s4 - vsub.f32 s5 , s5 , s5 + vldr.f32 s4 , =0 + vldr.f32 s5 , =0 .endm @@ -313,8 +313,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f32 s4 , s4 , s4 - vsub.f32 s5 , s5 , s5 + vldr.f32 s4 , =0 + vldr.f32 s5 , =0 .endm @@ -371,7 +371,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s4 , s4 , s4 + vldr.f32 s4 , =0 .endm @@ -406,7 +406,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s4 , s4 , s4 + vldr.f32 s4 , =0 .endm diff --git a/kernel/arm/iamax_vfp.S b/kernel/arm/iamax_vfp.S index f50c28e42..a77980518 100644 --- a/kernel/arm/iamax_vfp.S +++ b/kernel/arm/iamax_vfp.S @@ -342,9 +342,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. push {r4} #if defined(DOUBLE) - vsub.f64 d0 , d0 , d0 + vldr.f64 d0 , =0 #else - vsub.f32 s0 , s0 , s0 + vldr.f32 s0 , =0 #endif mov INDEX, #0 diff --git a/kernel/arm/nrm2_vfp.S b/kernel/arm/nrm2_vfp.S index d80179a11..47279df8e 100644 --- a/kernel/arm/nrm2_vfp.S +++ b/kernel/arm/nrm2_vfp.S @@ -446,12 +446,12 @@ nrm2_begin: #if defined(COMPLEX) #if defined(DOUBLE) - vsub.f64 d0 , d0 , d0 // scale=0.0 + vldr.f64 d0 , =0 // scale=0.0 vldr.64 d1 , znrm2_one // ssq=1.0 vmov.f64 d7 , d1 // value 1.0 vmov.f64 d6 , d0 // value 0.0 #else - vsub.f32 s0 , s0 , s0 // scale=0.0 + vldr.f32 s0 , =0 // scale=0.0 vldr.32 s1 , cnrm2_one // ssq=1.0 vmov.f32 s7 , s1 // value 1.0 vmov.f32 s6 , s0 // value 0.0 @@ -460,12 +460,12 @@ nrm2_begin: #else #if defined(DOUBLE) - vsub.f64 d0 , d0 , d0 // scale=0.0 + vldr.f64 d0 , =0 // scale=0.0 vldr.64 d1 , dnrm2_one // ssq=1.0 vmov.f64 d7 , d1 // value 1.0 vmov.f64 d6 , d0 // value 0.0 #else - vsub.f32 s0 , s0 , s0 // scale=0.0 + vldr.f32 s0 , =0 // scale=0.0 vldr.32 s1 , snrm2_one // ssq=1.0 vmov.f32 s7 , s1 // value 1.0 vmov.f32 s6 , s0 // value 0.0 diff --git a/kernel/arm/nrm2_vfpv3.S b/kernel/arm/nrm2_vfpv3.S index 34b251e9a..8c47a5e5f 100644 --- a/kernel/arm/nrm2_vfpv3.S +++ b/kernel/arm/nrm2_vfpv3.S @@ -400,17 +400,22 @@ KERNEL_S1_END_\@: * End of macro definitions **************************************************************************************/ +nrm2_zeros: + .align 5 + .word 0x00000000 + .word 0x00000000 + PROLOGUE .align 5 #if defined(DOUBLE) - vsub.f64 d0 , d0 , d0 // scale=0.0 + vldr.f64 d0 , nrm2_zeros // scale=0.0 vmov.f64 d1 , #1.0 // ssq=1.0 vmov.f64 d7 , d1 // value 1.0 vmov.f64 d6 , d0 // value 0.0 #else - vsub.f32 s0 , s0 , s0 // scale=0.0 + vldr.f32 s0 , nrm2_zeros // scale=0.0 vmov.f32 s1 , #1.0 // ssq=1.0 vmov.f32 s7 , s1 // value 1.0 vmov.f32 s6 , s0 // value 0.0 diff --git a/kernel/arm/sdot_vfp.S b/kernel/arm/sdot_vfp.S index a6fcf2ae6..0f350e8c6 100644 --- a/kernel/arm/sdot_vfp.S +++ b/kernel/arm/sdot_vfp.S @@ -242,13 +242,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(DSDOT) - vsub.f64 d0 , d0 , d0 - vsub.f64 d1 , d1 , d1 + vldr.f64 d0 , =0 + vldr.f64 d1 , =0 #else - vsub.f32 s0 , s0 , s0 - vsub.f32 s1 , s1 , s1 + vldr.f32 s0 , =0 + vldr.f32 s1 , =0 #endif diff --git a/kernel/arm/sgemm_kernel_4x2_vfp.S b/kernel/arm/sgemm_kernel_4x2_vfp.S index 4dfb7333d..bff8a7e39 100644 --- a/kernel/arm/sgemm_kernel_4x2_vfp.S +++ b/kernel/arm/sgemm_kernel_4x2_vfp.S @@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9, s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -221,7 +221,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s12, s8 .endm @@ -271,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -326,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 .endm @@ -368,7 +368,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 .endm diff --git a/kernel/arm/sgemm_kernel_4x4_vfpv3.S b/kernel/arm/sgemm_kernel_4x4_vfpv3.S index 078f14a80..ef0b8fc90 100644 --- a/kernel/arm/sgemm_kernel_4x4_vfpv3.S +++ b/kernel/arm/sgemm_kernel_4x4_vfpv3.S @@ -102,7 +102,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x4 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -349,7 +348,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x4 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -443,7 +441,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x4 - vsub.f32 s16 , s16 , s16 vmov.f32 s20, s16 vmov.f32 s24, s16 vmov.f32 s28, s16 @@ -506,7 +503,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -590,7 +586,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -651,7 +646,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s20, s16 .endm @@ -696,7 +690,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -755,7 +748,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 .endm @@ -797,13 +789,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /******************************************************************************/ -.macro INIT1x1 - - vsub.f32 s16 , s16 , s16 - -.endm - - .macro KERNEL1x1_SUB @@ -930,9 +915,24 @@ sgemm_kernel_L4_M4_32: b sgemm_kernel_L4_M4_44 + /* Note on loading zero into a fp register + * vsub doesn't work since it cannot handle NaN and infinity + * vmov.Fn doesn't work with 0 + * vmov.In and veor are neon + * vldr , =0 doesn't work since the function is larger than 2KB + * and the assembler is not able to insert constant pool inside + * the function body. + * + * Therefore, the best way I've found is to manually create multiple + * copies of the zero constant and `vldr` from different ones depending + * on where the instruction is. + */ +sgemm_f32zero: + .word 0x00000000 sgemm_kernel_L4_M4_40: + vldr.f32 s16 , sgemm_f32zero INIT4x4 @@ -969,6 +969,7 @@ sgemm_kernel_L4_M2_BEGIN: sgemm_kernel_L4_M2_20: + vldr.f32 s16 , sgemm_f32zero INIT2x4 mov BO, BC @@ -1014,9 +1015,14 @@ sgemm_kernel_L4_M1_BEGIN: tst I, #1 // I = I % 2 ble sgemm_kernel_L4_END + b sgemm_kernel_L4_M1_20 + +sgemm_f32zero4: + .word 0x00000000 sgemm_kernel_L4_M1_20: + vldr.f32 s16 , sgemm_f32zero4 INIT1x4 mov BO, BC @@ -1100,6 +1106,7 @@ sgemm_kernel_L2_M4_BEGIN: sgemm_kernel_L2_M4_20: + vldr.f32 s16 , sgemm_f32zero3 INIT4x2 mov BO, BC @@ -1121,7 +1128,6 @@ sgemm_kernel_L2_M4_22: subs L, L, #1 bgt sgemm_kernel_L2_M4_22 - sgemm_kernel_L2_M4_40: ands L , K1, #7 // L = L % 8 @@ -1142,7 +1148,10 @@ sgemm_kernel_L2_M4_END: subs I, I, #1 bgt sgemm_kernel_L2_M4_20 + b sgemm_kernel_L2_M2_BEGIN +sgemm_f32zero3: + .word 0x00000000 sgemm_kernel_L2_M2_BEGIN: @@ -1155,6 +1164,7 @@ sgemm_kernel_L2_M2_BEGIN: sgemm_kernel_L2_M2_20: + vldr.f32 s16 , sgemm_f32zero3 INIT2x2 mov BO, BC @@ -1203,6 +1213,7 @@ sgemm_kernel_L2_M1_BEGIN: sgemm_kernel_L2_M1_20: + vldr.f32 s16 , sgemm_f32zero3 INIT1x2 mov BO, BC @@ -1278,6 +1289,7 @@ sgemm_kernel_L1_M4_BEGIN: sgemm_kernel_L1_M4_20: + vldr.f32 s16 , sgemm_f32zero3 INIT4x1 mov BO, BC @@ -1333,6 +1345,7 @@ sgemm_kernel_L1_M2_BEGIN: sgemm_kernel_L1_M2_20: + vldr.f32 s16 , sgemm_f32zero2 INIT2x1 mov BO, BC @@ -1381,7 +1394,7 @@ sgemm_kernel_L1_M1_BEGIN: sgemm_kernel_L1_M1_20: - INIT1x1 + vldr.f32 s16 , sgemm_f32zero2 mov BO, BC asrs L , K1, #3 // L = L / 8 @@ -1434,3 +1447,5 @@ sgemm_kernel_L999: EPILOGUE +sgemm_f32zero2: + .word 0x00000000 diff --git a/kernel/arm/strmm_kernel_4x2_vfp.S b/kernel/arm/strmm_kernel_4x2_vfp.S index e7511ffef..c7ae94ddd 100644 --- a/kernel/arm/strmm_kernel_4x2_vfp.S +++ b/kernel/arm/strmm_kernel_4x2_vfp.S @@ -90,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -156,7 +156,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9, s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -211,7 +211,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s12, s8 .endm @@ -259,7 +259,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -309,7 +309,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 vmov.f32 s9 , s8 .endm @@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + vldr.f32 s8 , =0 .endm diff --git a/kernel/arm/strmm_kernel_4x4_vfpv3.S b/kernel/arm/strmm_kernel_4x4_vfpv3.S index f6342a07d..f9b439e00 100644 --- a/kernel/arm/strmm_kernel_4x4_vfpv3.S +++ b/kernel/arm/strmm_kernel_4x4_vfpv3.S @@ -88,7 +88,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x4 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -322,7 +321,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x4 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -405,7 +403,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x4 - vsub.f32 s16 , s16 , s16 vmov.f32 s20, s16 vmov.f32 s24, s16 vmov.f32 s28, s16 @@ -464,7 +461,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -538,7 +534,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -593,7 +588,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 vmov.f32 s20, s16 .endm @@ -636,7 +630,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -690,7 +683,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 vmov.f32 s17, s16 .endm @@ -729,13 +721,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /******************************************************************************/ -.macro INIT1x1 - - vsub.f32 s16 , s16 , s16 - -.endm - - .macro KERNEL1x1_SUB @@ -955,9 +940,24 @@ _L4_M4_32: b _L4_M4_44 + /* Note on loading zero into a fp register + * vsub doesn't work since it cannot handle NaN and infinity + * vmov.Fn doesn't work with 0 + * vmov.In and veor are neon + * vldr , =0 doesn't work since the function is larger than 2KB + * and the assembler is not able to insert constant pool inside + * the function body. + * + * Therefore, the best way I've found is to manually create multiple + * copies of the zero constant and `vldr` from different ones depending + * on where the instruction is. + */ +strmm_f32zero: + .word 0x00000000 _L4_M4_40: + vldr.f32 s16 , strmm_f32zero INIT4x4 @@ -1014,6 +1014,7 @@ _L4_M2_BEGIN: _L4_M2_20: + vldr.f32 s16 , strmm_f32zero INIT2x4 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1112,6 +1113,7 @@ _L4_M1_BEGIN: _L4_M1_20: + vldr.f32 s16 , strmm_f32zero3 INIT1x4 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1252,9 +1254,14 @@ _L2_M4_BEGIN: ldr I, M asrs I, I, #2 // I = I / 4 ble _L2_M2_BEGIN + b _L2_M4_20 + +strmm_f32zero3: + .word 0x00000000 _L2_M4_20: + vldr.f32 s16 , strmm_f32zero3 INIT4x2 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1360,6 +1367,7 @@ _L2_M2_BEGIN: _L2_M2_20: + vldr.f32 s16 , strmm_f32zero3 INIT2x2 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1458,6 +1466,7 @@ _L2_M1_BEGIN: _L2_M1_20: + vldr.f32 s16 , strmm_f32zero4 INIT1x2 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1591,9 +1600,14 @@ _L1_M4_BEGIN: ldr I, M asrs I, I, #2 // I = I / 4 ble _L1_M2_BEGIN + b _L1_M4_20 + +strmm_f32zero4: + .word 0x00000000 _L1_M4_20: + vldr.f32 s16 , strmm_f32zero4 INIT4x1 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1700,6 +1714,7 @@ _L1_M2_BEGIN: _L1_M2_20: + vldr.f32 s16 , strmm_f32zero2 INIT2x1 #if (defined(LEFT) && defined(TRANSA)) || \ @@ -1798,7 +1813,7 @@ _L1_M1_BEGIN: _L1_M1_20: - INIT1x1 + vldr.f32 s16 , strmm_f32zero2 #if (defined(LEFT) && defined(TRANSA)) || \ (!defined(LEFT) && !defined(TRANSA)) @@ -1882,3 +1897,5 @@ _L999: EPILOGUE +strmm_f32zero2: + .word 0x00000000 diff --git a/kernel/arm/zdot_vfp.S b/kernel/arm/zdot_vfp.S index 622169bb9..a2327db18 100644 --- a/kernel/arm/zdot_vfp.S +++ b/kernel/arm/zdot_vfp.S @@ -190,10 +190,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. mov Y, OLD_Y ldr INC_Y, OLD_INC_Y - vsub.f64 d0 , d0 , d0 - vsub.f64 d1 , d1 , d1 - vsub.f64 d2 , d2 , d2 - vsub.f64 d3 , d3 , d3 + vldr.f64 d0 , =0 + vldr.f64 d1 , =0 + vldr.f64 d2 , =0 + vldr.f64 d3 , =0 cmp N, #0 ble zdot_kernel_L999 diff --git a/kernel/arm/zgemm_kernel_2x2_vfp.S b/kernel/arm/zgemm_kernel_2x2_vfp.S index f4134eaf6..28b737a9c 100644 --- a/kernel/arm/zgemm_kernel_2x2_vfp.S +++ b/kernel/arm/zgemm_kernel_2x2_vfp.S @@ -131,7 +131,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -383,7 +383,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -557,7 +557,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -724,7 +724,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 .endm diff --git a/kernel/arm/zgemm_kernel_2x2_vfpv3.S b/kernel/arm/zgemm_kernel_2x2_vfpv3.S index 29c3f4582..7333a09f9 100644 --- a/kernel/arm/zgemm_kernel_2x2_vfpv3.S +++ b/kernel/arm/zgemm_kernel_2x2_vfpv3.S @@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -404,7 +404,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -586,7 +586,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -766,7 +766,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d24, d16 vmov.f64 d25, d16 diff --git a/kernel/arm/zgemv_n_vfp.S b/kernel/arm/zgemv_n_vfp.S index d4cab090a..9369f9815 100644 --- a/kernel/arm/zgemv_n_vfp.S +++ b/kernel/arm/zgemv_n_vfp.S @@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F4 pld [ YO, #Y_PRE ] - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -222,7 +222,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 .endm @@ -269,7 +269,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 .endm diff --git a/kernel/arm/zgemv_t_vfp.S b/kernel/arm/zgemv_t_vfp.S index 500a3b608..789c9d8c6 100644 --- a/kernel/arm/zgemv_t_vfp.S +++ b/kernel/arm/zgemv_t_vfp.S @@ -117,10 +117,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 - vsub.f64 d14, d14, d14 - vsub.f64 d15, d15, d15 + vldr.f64 d12, =0 + vldr.f64 d13, =0 + vldr.f64 d14, =0 + vldr.f64 d15, =0 .endm @@ -173,8 +173,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 + vldr.f64 d12, =0 + vldr.f64 d13, =0 .endm @@ -216,10 +216,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 - vsub.f64 d14, d14, d14 - vsub.f64 d15, d15, d15 + vldr.f64 d12, =0 + vldr.f64 d13, =0 + vldr.f64 d14, =0 + vldr.f64 d15, =0 .endm @@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 + vldr.f64 d12, =0 + vldr.f64 d13, =0 .endm diff --git a/kernel/arm/ztrmm_kernel_2x2_vfp.S b/kernel/arm/ztrmm_kernel_2x2_vfp.S index 109ee07a8..9b63c115f 100644 --- a/kernel/arm/ztrmm_kernel_2x2_vfp.S +++ b/kernel/arm/ztrmm_kernel_2x2_vfp.S @@ -140,7 +140,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -356,10 +356,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + vldr.f64 d4, =0 + vldr.f64 d5, =0 + vldr.f64 d6, =0 + vldr.f64 d7, =0 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -373,10 +373,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmiad CO1, { d4 - d7 } - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + vldr.f64 d4, =0 + vldr.f64 d5, =0 + vldr.f64 d6, =0 + vldr.f64 d7, =0 FMAC_R1 d4 , d0 , d12 FMAC_I1 d5 , d0 , d13 @@ -398,7 +398,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -545,8 +545,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + vldr.f64 d4, =0 + vldr.f64 d5, =0 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -555,8 +555,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmiad CO1, { d4 - d5 } - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + vldr.f64 d4, =0 + vldr.f64 d5, =0 FMAC_R1 d4 , d0 , d12 FMAC_I1 d5 , d0 , d13 @@ -574,7 +574,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -718,10 +718,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + vldr.f64 d4, =0 + vldr.f64 d5, =0 + vldr.f64 d6, =0 + vldr.f64 d7, =0 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -744,7 +744,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + vldr.f64 d8 , =0 vmov.f64 d9 , d8 .endm @@ -850,8 +850,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + vldr.f64 d4, =0 + vldr.f64 d5, =0 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 diff --git a/kernel/arm/ztrmm_kernel_2x2_vfpv3.S b/kernel/arm/ztrmm_kernel_2x2_vfpv3.S index 761dbccee..db83d4351 100644 --- a/kernel/arm/ztrmm_kernel_2x2_vfpv3.S +++ b/kernel/arm/ztrmm_kernel_2x2_vfpv3.S @@ -134,7 +134,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -388,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -566,7 +566,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -743,7 +743,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d16 , d16 , d16 + vldr.f64 d16 , =0 vmov.f64 d17, d16 vmov.f64 d24, d16 vmov.f64 d25, d16 diff --git a/kernel/generic/zgemmkernel_2x2.c b/kernel/generic/zgemmkernel_2x2.c index c368111dd..11af64679 100644 --- a/kernel/generic/zgemmkernel_2x2.c +++ b/kernel/generic/zgemmkernel_2x2.c @@ -797,7 +797,7 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alphar,FLOAT alphai,FLOAT* b res1 = res1-load0*load3; #endif #if defined(RN) || defined(RT) || defined(CN) || defined(CT) - load0 = ptrba[2*0+0]; + load0 = ptrba[2*0+0]; load1 = ptrbb[2*0+0]; res0 = res0+load0*load1; load2 = ptrba[2*0+1];