From eaa63165df9354560380d945ce6f47eb47196b62 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 14:42:38 +0100 Subject: [PATCH] updated cgemv and zgemv kernels for armv6 --- kernel/arm/cgemv_n_vfp.S | 16 ++++++++++++---- kernel/arm/cgemv_t_vfp.S | 32 ++++++++++++++++++++------------ kernel/arm/zgemv_n_vfp.S | 17 +++++++++++++---- kernel/arm/zgemv_t_vfp.S | 32 ++++++++++++++++++++------------ 4 files changed, 65 insertions(+), 32 deletions(-) diff --git a/kernel/arm/cgemv_n_vfp.S b/kernel/arm/cgemv_n_vfp.S index 712e7f0d8..5d2748644 100644 --- a/kernel/arm/cgemv_n_vfp.S +++ b/kernel/arm/cgemv_n_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define ALPHA_I [fp, #-236] #define ALPHA_R [fp, #-244] @@ -117,7 +121,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F4 pld [ YO, #Y_PRE ] - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -267,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -448,6 +452,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp OLD_M, #0 ble cgemvn_kernel_L999 diff --git a/kernel/arm/cgemv_t_vfp.S b/kernel/arm/cgemv_t_vfp.S index 52276a06f..76c8a8f18 100644 --- a/kernel/arm/cgemv_t_vfp.S +++ b/kernel/arm/cgemv_t_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define N [fp, #-252 ] #define A [fp, #-256 ] @@ -116,10 +120,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 - vsub.f32 s14, s14, s14 - vsub.f32 s15, s15, s15 + flds s12, FP_ZERO + vmov.f32 s13, s12 + vmov.f32 s14, s12 + vmov.f32 s15, s12 .endm @@ -172,8 +176,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 + flds s12, FP_ZERO + vmov.f32 s13, s12 .endm @@ -215,10 +219,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 - vsub.f32 s14, s14, s14 - vsub.f32 s15, s15, s15 + flds s12, FP_ZERO + vmov.f32 s13, s12 + vmov.f32 s14, s12 + vmov.f32 s15, s12 .endm @@ -281,8 +285,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 + flds s12, FP_ZERO + vmov.f32 s13, s12 .endm @@ -345,6 +349,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp M, #0 ble cgemvt_kernel_L999 diff --git a/kernel/arm/zgemv_n_vfp.S b/kernel/arm/zgemv_n_vfp.S index d4cab090a..da9a91043 100644 --- a/kernel/arm/zgemv_n_vfp.S +++ b/kernel/arm/zgemv_n_vfp.S @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + + #define ALPHA_I [fp, #-236] #define ALPHA_R [fp, #-244] @@ -117,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F4 pld [ YO, #Y_PRE ] - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -222,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 .endm @@ -269,7 +274,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 .endm @@ -450,6 +455,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp OLD_M, #0 ble zgemvn_kernel_L999 diff --git a/kernel/arm/zgemv_t_vfp.S b/kernel/arm/zgemv_t_vfp.S index 500a3b608..211fa0701 100644 --- a/kernel/arm/zgemv_t_vfp.S +++ b/kernel/arm/zgemv_t_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define N [fp, #-252 ] #define A [fp, #-256 ] @@ -117,10 +121,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 - vsub.f64 d14, d14, d14 - vsub.f64 d15, d15, d15 + fldd d12, FP_ZERO + vmov.f64 d13, d12 + vmov.f64 d14, d12 + vmov.f64 d15, d12 .endm @@ -173,8 +177,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 + fldd d12, FP_ZERO + vmov.f64 d13, d12 .endm @@ -216,10 +220,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 - vsub.f64 d14, d14, d14 - vsub.f64 d15, d15, d15 + fldd d12, FP_ZERO + vmov.f64 d13, d12 + vmov.f64 d14, d12 + vmov.f64 d15, d12 .endm @@ -282,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 + fldd d12, FP_ZERO + vmov.f64 d13, d12 .endm @@ -346,6 +350,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp M, #0 ble zgemvt_kernel_L999