From e63e9f9f266a9869f1bc09d471701ca35f6e26de Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 11:55:50 +0100 Subject: [PATCH] updated gemm_kernels for armv6 --- kernel/arm/cgemm_kernel_2x2_vfp.S | 16 ++++++++++++---- kernel/arm/dgemm_kernel_4x2_vfp.S | 21 +++++++++++++++------ kernel/arm/sgemm_kernel_4x2_vfp.S | 20 ++++++++++++++------ kernel/arm/zgemm_kernel_2x2_vfp.S | 16 ++++++++++++---- 4 files changed, 53 insertions(+), 20 deletions(-) diff --git a/kernel/arm/cgemm_kernel_2x2_vfp.S b/kernel/arm/cgemm_kernel_2x2_vfp.S index a059ef505..f0517cb47 100644 --- a/kernel/arm/cgemm_kernel_2x2_vfp.S +++ b/kernel/arm/cgemm_kernel_2x2_vfp.S @@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -138,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -340,7 +344,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -514,7 +518,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -681,7 +685,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -822,6 +826,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 4 * 2 str r3, LDC diff --git a/kernel/arm/dgemm_kernel_4x2_vfp.S b/kernel/arm/dgemm_kernel_4x2_vfp.S index 9fb881d73..183269d1b 100644 --- a/kernel/arm/dgemm_kernel_4x2_vfp.S +++ b/kernel/arm/dgemm_kernel_4x2_vfp.S @@ -56,8 +56,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA [fp, #-280] + #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] @@ -85,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -173,7 +178,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9, d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -233,7 +238,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d12, d8 .endm @@ -283,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -338,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 .endm @@ -380,7 +385,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO .endm @@ -433,6 +438,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 8 str r3, LDC diff --git a/kernel/arm/sgemm_kernel_4x2_vfp.S b/kernel/arm/sgemm_kernel_4x2_vfp.S index 4dfb7333d..e8b44b742 100644 --- a/kernel/arm/sgemm_kernel_4x2_vfp.S +++ b/kernel/arm/sgemm_kernel_4x2_vfp.S @@ -56,6 +56,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA [fp, #-280] #define B [fp, #4 ] @@ -85,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -161,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9, s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -221,7 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s12, s8 .endm @@ -271,7 +275,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -326,7 +330,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9 , s8 .endm @@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO .endm @@ -421,6 +425,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #2 // ldc = ldc * 4 str r3, LDC diff --git a/kernel/arm/zgemm_kernel_2x2_vfp.S b/kernel/arm/zgemm_kernel_2x2_vfp.S index f4134eaf6..46507c4d2 100644 --- a/kernel/arm/zgemm_kernel_2x2_vfp.S +++ b/kernel/arm/zgemm_kernel_2x2_vfp.S @@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -131,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -383,7 +387,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -557,7 +561,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -724,7 +728,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 .endm @@ -869,6 +873,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #4 // ldc = ldc * 8 * 2 str r3, LDC