diff --git a/kernel/arm/ctrmm_kernel_2x2_vfp.S b/kernel/arm/ctrmm_kernel_2x2_vfp.S index a48c8608d..8cb7ede9d 100644 --- a/kernel/arm/ctrmm_kernel_2x2_vfp.S +++ b/kernel/arm/ctrmm_kernel_2x2_vfp.S @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -136,7 +141,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -301,10 +306,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + flds s4, FP_ZERO + vmov.f32 s5, s4 + vmov.f32 s6, s4 + vmov.f32 s7, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -318,10 +323,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmias CO1, { s4 - s7 } - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + flds s4, FP_ZERO + vmov.f32 s5, s4 + vmov.f32 s6, s4 + vmov.f32 s7, s4 FMAC_R1 s4 , s0 , s12 FMAC_I1 s5 , s0 , s13 @@ -343,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -490,8 +495,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + flds s4, FP_ZERO + vmov.f32 s5, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -500,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmias CO1, { s4 - s5 } - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + flds s4, FP_ZERO + vmov.f32 s5, s4 FMAC_R1 s4 , s0 , s12 FMAC_I1 s5 , s0 , s13 @@ -519,7 +524,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -663,10 +668,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + flds s4, FP_ZERO + vmov.f32 s5, s4 + vmov.f32 s6, s4 + vmov.f32 s7, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -689,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -795,8 +800,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + flds s4, FP_ZERO + vmov.f32 s5, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -831,6 +836,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 4 * 2 str r3, LDC diff --git a/kernel/arm/dtrmm_kernel_4x2_vfp.S b/kernel/arm/dtrmm_kernel_4x2_vfp.S index 3528e0860..c578d2b1e 100644 --- a/kernel/arm/dtrmm_kernel_4x2_vfp.S +++ b/kernel/arm/dtrmm_kernel_4x2_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + #define ALPHA [fp, #-276 ] #define B [fp, #4 ] @@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -165,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9, d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d12, d8 .endm @@ -268,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -318,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 .endm @@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO .endm @@ -409,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 8 str r3, LDC diff --git a/kernel/arm/strmm_kernel_4x2_vfp.S b/kernel/arm/strmm_kernel_4x2_vfp.S index e7511ffef..8f97644ec 100644 --- a/kernel/arm/strmm_kernel_4x2_vfp.S +++ b/kernel/arm/strmm_kernel_4x2_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + #define ALPHA [fp, #-276 ] #define B [fp, #4 ] @@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -156,7 +160,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9, s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -211,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s12, s8 .endm @@ -259,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -309,7 +313,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -348,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO .endm @@ -400,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #2 // ldc = ldc * 4 str r3, LDC diff --git a/kernel/arm/ztrmm_kernel_2x2_vfp.S b/kernel/arm/ztrmm_kernel_2x2_vfp.S index 109ee07a8..dc80b17b8 100644 --- a/kernel/arm/ztrmm_kernel_2x2_vfp.S +++ b/kernel/arm/ztrmm_kernel_2x2_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -140,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -356,10 +360,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 + vmov.f64 d6 , d4 + vmov.f64 d7 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -373,10 +377,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmiad CO1, { d4 - d7 } - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 + vmov.f64 d6 , d4 + vmov.f64 d7 , d4 FMAC_R1 d4 , d0 , d12 FMAC_I1 d5 , d0 , d13 @@ -398,7 +402,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -545,8 +549,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -555,8 +559,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmiad CO1, { d4 - d5 } - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 FMAC_R1 d4 , d0 , d12 FMAC_I1 d5 , d0 , d13 @@ -574,7 +578,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -718,10 +722,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 + vmov.f64 d6 , d4 + vmov.f64 d7 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -744,7 +748,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 .endm @@ -850,8 +854,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -888,6 +892,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #4 // ldc = ldc * 8 * 2 str r3, LDC