From aafd3ab60e5da6e564a0b7a87458db7e16d08565 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 10:56:49 +0100 Subject: [PATCH 01/10] updated cdot and zdot on arm --- kernel/arm/cdot_vfp.S | 11 ++++++----- kernel/arm/zdot_vfp.S | 11 +++++++---- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/kernel/arm/cdot_vfp.S b/kernel/arm/cdot_vfp.S index 2ccda3397..0497b6d83 100644 --- a/kernel/arm/cdot_vfp.S +++ b/kernel/arm/cdot_vfp.S @@ -185,14 +185,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r4, fp, #128 vstm r4, { s8 - s15} // store floating point registers + movs r4, #0 // clear floating point register + vmov s0, r4 + vmov s1, s0 + vmov s2, s0 + vmov s3, s0 + mov Y, OLD_Y ldr INC_Y, OLD_INC_Y - vsub.f32 s0 , s0 , s0 - vsub.f32 s1 , s1 , s1 - vsub.f32 s2 , s2 , s2 - vsub.f32 s3 , s3 , s3 - cmp N, #0 ble cdot_kernel_L999 diff --git a/kernel/arm/zdot_vfp.S b/kernel/arm/zdot_vfp.S index 622169bb9..936ce9f60 100644 --- a/kernel/arm/zdot_vfp.S +++ b/kernel/arm/zdot_vfp.S @@ -187,13 +187,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r4, fp, #128 vstm r4, { d8 - d15} // store floating point registers + movs r4, #0 // clear floating point register + vmov s0, r4 + vcvt.f64.f32 d0, s0 + vcvt.f64.f32 d1, s0 + vcvt.f64.f32 d2, s0 + vcvt.f64.f32 d3, s0 + mov Y, OLD_Y ldr INC_Y, OLD_INC_Y - vsub.f64 d0 , d0 , d0 - vsub.f64 d1 , d1 , d1 - vsub.f64 d2 , d2 , d2 - vsub.f64 d3 , d3 , d3 cmp N, #0 ble zdot_kernel_L999 From e63e9f9f266a9869f1bc09d471701ca35f6e26de Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 11:55:50 +0100 Subject: [PATCH 02/10] updated gemm_kernels for armv6 --- kernel/arm/cgemm_kernel_2x2_vfp.S | 16 ++++++++++++---- kernel/arm/dgemm_kernel_4x2_vfp.S | 21 +++++++++++++++------ kernel/arm/sgemm_kernel_4x2_vfp.S | 20 ++++++++++++++------ kernel/arm/zgemm_kernel_2x2_vfp.S | 16 ++++++++++++---- 4 files changed, 53 insertions(+), 20 deletions(-) diff --git a/kernel/arm/cgemm_kernel_2x2_vfp.S b/kernel/arm/cgemm_kernel_2x2_vfp.S index a059ef505..f0517cb47 100644 --- a/kernel/arm/cgemm_kernel_2x2_vfp.S +++ b/kernel/arm/cgemm_kernel_2x2_vfp.S @@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -138,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -340,7 +344,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -514,7 +518,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -681,7 +685,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -822,6 +826,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 4 * 2 str r3, LDC diff --git a/kernel/arm/dgemm_kernel_4x2_vfp.S b/kernel/arm/dgemm_kernel_4x2_vfp.S index 9fb881d73..183269d1b 100644 --- a/kernel/arm/dgemm_kernel_4x2_vfp.S +++ b/kernel/arm/dgemm_kernel_4x2_vfp.S @@ -56,8 +56,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA [fp, #-280] + #define B [fp, #4 ] #define C [fp, #8 ] #define OLD_LDC [fp, #12 ] @@ -85,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -173,7 +178,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9, d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -233,7 +238,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d12, d8 .endm @@ -283,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -338,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 .endm @@ -380,7 +385,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO .endm @@ -433,6 +438,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 8 str r3, LDC diff --git a/kernel/arm/sgemm_kernel_4x2_vfp.S b/kernel/arm/sgemm_kernel_4x2_vfp.S index 4dfb7333d..e8b44b742 100644 --- a/kernel/arm/sgemm_kernel_4x2_vfp.S +++ b/kernel/arm/sgemm_kernel_4x2_vfp.S @@ -56,6 +56,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA [fp, #-280] #define B [fp, #4 ] @@ -85,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -161,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9, s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -221,7 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s12, s8 .endm @@ -271,7 +275,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -326,7 +330,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO vmov.f32 s9 , s8 .endm @@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8, FP_ZERO .endm @@ -421,6 +425,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #2 // ldc = ldc * 4 str r3, LDC diff --git a/kernel/arm/zgemm_kernel_2x2_vfp.S b/kernel/arm/zgemm_kernel_2x2_vfp.S index f4134eaf6..46507c4d2 100644 --- a/kernel/arm/zgemm_kernel_2x2_vfp.S +++ b/kernel/arm/zgemm_kernel_2x2_vfp.S @@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -131,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -383,7 +387,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -557,7 +561,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -724,7 +728,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 .endm @@ -869,6 +873,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #4 // ldc = ldc * 8 * 2 str r3, LDC From c65357c56685be1e6f8b1abc1b5328327ab948a5 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 13:03:33 +0100 Subject: [PATCH 03/10] updated trmm_kernels for armv6 --- kernel/arm/ctrmm_kernel_2x2_vfp.S | 53 ++++++++++++++++++------------- kernel/arm/dtrmm_kernel_4x2_vfp.S | 20 ++++++++---- kernel/arm/strmm_kernel_4x2_vfp.S | 20 ++++++++---- kernel/arm/ztrmm_kernel_2x2_vfp.S | 52 +++++++++++++++++------------- 4 files changed, 89 insertions(+), 56 deletions(-) diff --git a/kernel/arm/ctrmm_kernel_2x2_vfp.S b/kernel/arm/ctrmm_kernel_2x2_vfp.S index a48c8608d..8cb7ede9d 100644 --- a/kernel/arm/ctrmm_kernel_2x2_vfp.S +++ b/kernel/arm/ctrmm_kernel_2x2_vfp.S @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -136,7 +141,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -301,10 +306,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + flds s4, FP_ZERO + vmov.f32 s5, s4 + vmov.f32 s6, s4 + vmov.f32 s7, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -318,10 +323,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmias CO1, { s4 - s7 } - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + flds s4, FP_ZERO + vmov.f32 s5, s4 + vmov.f32 s6, s4 + vmov.f32 s7, s4 FMAC_R1 s4 , s0 , s12 FMAC_I1 s5 , s0 , s13 @@ -343,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -490,8 +495,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + flds s4, FP_ZERO + vmov.f32 s5, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -500,8 +505,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmias CO1, { s4 - s5 } - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + flds s4, FP_ZERO + vmov.f32 s5, s4 FMAC_R1 s4 , s0 , s12 FMAC_I1 s5 , s0 , s13 @@ -519,7 +524,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -663,10 +668,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 - vsub.f32 s6, s6, s6 - vsub.f32 s7, s7, s7 + flds s4, FP_ZERO + vmov.f32 s5, s4 + vmov.f32 s6, s4 + vmov.f32 s7, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -689,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -795,8 +800,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. flds s0, ALPHA_R flds s1, ALPHA_I - vsub.f32 s4, s4, s4 - vsub.f32 s5, s5, s5 + flds s4, FP_ZERO + vmov.f32 s5, s4 FMAC_R1 s4 , s0 , s8 FMAC_I1 s5 , s0 , s9 @@ -831,6 +836,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 4 * 2 str r3, LDC diff --git a/kernel/arm/dtrmm_kernel_4x2_vfp.S b/kernel/arm/dtrmm_kernel_4x2_vfp.S index 3528e0860..c578d2b1e 100644 --- a/kernel/arm/dtrmm_kernel_4x2_vfp.S +++ b/kernel/arm/dtrmm_kernel_4x2_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + #define ALPHA [fp, #-276 ] #define B [fp, #4 ] @@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -165,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9, d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d12, d8 .endm @@ -268,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9, d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -318,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 .endm @@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO .endm @@ -409,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 8 str r3, LDC diff --git a/kernel/arm/strmm_kernel_4x2_vfp.S b/kernel/arm/strmm_kernel_4x2_vfp.S index e7511ffef..8f97644ec 100644 --- a/kernel/arm/strmm_kernel_4x2_vfp.S +++ b/kernel/arm/strmm_kernel_4x2_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + #define ALPHA [fp, #-276 ] #define B [fp, #4 ] @@ -90,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -156,7 +160,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9, s8 vmov.f32 s12, s8 vmov.f32 s13, s8 @@ -211,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s12, s8 .endm @@ -259,7 +263,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9, s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -309,7 +313,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -348,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO .endm @@ -400,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #2 // ldc = ldc * 4 str r3, LDC diff --git a/kernel/arm/ztrmm_kernel_2x2_vfp.S b/kernel/arm/ztrmm_kernel_2x2_vfp.S index 109ee07a8..dc80b17b8 100644 --- a/kernel/arm/ztrmm_kernel_2x2_vfp.S +++ b/kernel/arm/ztrmm_kernel_2x2_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-232] +#define FP_ZERO_0 [fp, #-232] +#define FP_ZERO_1 [fp, #-228] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -140,7 +144,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -356,10 +360,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 + vmov.f64 d6 , d4 + vmov.f64 d7 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -373,10 +377,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmiad CO1, { d4 - d7 } - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 + vmov.f64 d6 , d4 + vmov.f64 d7 , d4 FMAC_R1 d4 , d0 , d12 FMAC_I1 d5 , d0 , d13 @@ -398,7 +402,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d12, d8 vmov.f64 d13, d8 @@ -545,8 +549,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -555,8 +559,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fstmiad CO1, { d4 - d5 } - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 FMAC_R1 d4 , d0 , d12 FMAC_I1 d5 , d0 , d13 @@ -574,7 +578,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -718,10 +722,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 - vsub.f64 d6, d6 , d6 - vsub.f64 d7, d7 , d7 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 + vmov.f64 d6 , d4 + vmov.f64 d7 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -744,7 +748,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 .endm @@ -850,8 +854,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. fldd d0, ALPHA_R fldd d1, ALPHA_I - vsub.f64 d4, d4 , d4 - vsub.f64 d5, d5 , d5 + fldd d4 , FP_ZERO + vmov.f64 d5 , d4 FMAC_R1 d4 , d0 , d8 FMAC_I1 d5 , d0 , d9 @@ -888,6 +892,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #4 // ldc = ldc * 8 * 2 str r3, LDC From eaa63165df9354560380d945ce6f47eb47196b62 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 14:42:38 +0100 Subject: [PATCH 04/10] updated cgemv and zgemv kernels for armv6 --- kernel/arm/cgemv_n_vfp.S | 16 ++++++++++++---- kernel/arm/cgemv_t_vfp.S | 32 ++++++++++++++++++++------------ kernel/arm/zgemv_n_vfp.S | 17 +++++++++++++---- kernel/arm/zgemv_t_vfp.S | 32 ++++++++++++++++++++------------ 4 files changed, 65 insertions(+), 32 deletions(-) diff --git a/kernel/arm/cgemv_n_vfp.S b/kernel/arm/cgemv_n_vfp.S index 712e7f0d8..5d2748644 100644 --- a/kernel/arm/cgemv_n_vfp.S +++ b/kernel/arm/cgemv_n_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define ALPHA_I [fp, #-236] #define ALPHA_R [fp, #-244] @@ -117,7 +121,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F4 pld [ YO, #Y_PRE ] - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -220,7 +224,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -267,7 +271,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10, s8 vmov.f32 s11, s8 @@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 .endm @@ -448,6 +452,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp OLD_M, #0 ble cgemvn_kernel_L999 diff --git a/kernel/arm/cgemv_t_vfp.S b/kernel/arm/cgemv_t_vfp.S index 52276a06f..76c8a8f18 100644 --- a/kernel/arm/cgemv_t_vfp.S +++ b/kernel/arm/cgemv_t_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define N [fp, #-252 ] #define A [fp, #-256 ] @@ -116,10 +120,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 - vsub.f32 s14, s14, s14 - vsub.f32 s15, s15, s15 + flds s12, FP_ZERO + vmov.f32 s13, s12 + vmov.f32 s14, s12 + vmov.f32 s15, s12 .endm @@ -172,8 +176,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 + flds s12, FP_ZERO + vmov.f32 s13, s12 .endm @@ -215,10 +219,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 - vsub.f32 s14, s14, s14 - vsub.f32 s15, s15, s15 + flds s12, FP_ZERO + vmov.f32 s13, s12 + vmov.f32 s14, s12 + vmov.f32 s15, s12 .endm @@ -281,8 +285,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s12, s12, s12 - vsub.f32 s13, s13, s13 + flds s12, FP_ZERO + vmov.f32 s13, s12 .endm @@ -345,6 +349,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp M, #0 ble cgemvt_kernel_L999 diff --git a/kernel/arm/zgemv_n_vfp.S b/kernel/arm/zgemv_n_vfp.S index d4cab090a..da9a91043 100644 --- a/kernel/arm/zgemv_n_vfp.S +++ b/kernel/arm/zgemv_n_vfp.S @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + + #define ALPHA_I [fp, #-236] #define ALPHA_R [fp, #-244] @@ -117,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F4 pld [ YO, #Y_PRE ] - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -222,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 .endm @@ -269,7 +274,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10, d8 vmov.f64 d11, d8 @@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d8 , d8 , d8 + fldd d8, FP_ZERO vmov.f64 d9 , d8 .endm @@ -450,6 +455,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp OLD_M, #0 ble zgemvn_kernel_L999 diff --git a/kernel/arm/zgemv_t_vfp.S b/kernel/arm/zgemv_t_vfp.S index 500a3b608..211fa0701 100644 --- a/kernel/arm/zgemv_t_vfp.S +++ b/kernel/arm/zgemv_t_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define N [fp, #-252 ] #define A [fp, #-256 ] @@ -117,10 +121,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 - vsub.f64 d14, d14, d14 - vsub.f64 d15, d15, d15 + fldd d12, FP_ZERO + vmov.f64 d13, d12 + vmov.f64 d14, d12 + vmov.f64 d15, d12 .endm @@ -173,8 +177,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 + fldd d12, FP_ZERO + vmov.f64 d13, d12 .endm @@ -216,10 +220,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 - vsub.f64 d14, d14, d14 - vsub.f64 d15, d15, d15 + fldd d12, FP_ZERO + vmov.f64 d13, d12 + vmov.f64 d14, d12 + vmov.f64 d15, d12 .endm @@ -282,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d12, d12, d12 - vsub.f64 d13, d13, d13 + fldd d12, FP_ZERO + vmov.f64 d13, d12 .endm @@ -346,6 +350,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp M, #0 ble zgemvt_kernel_L999 From ceaa931e484477a9cebff1ee0ae4a0a9257757d2 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 16:31:19 +0100 Subject: [PATCH 05/10] updated gemv kernel for armv6 --- kernel/arm/gemv_n_vfp.S | 24 ++++++++++++++++-------- kernel/arm/gemv_t_vfp.S | 35 +++++++++++++++++++++++------------ lapack-netlib/TESTING/nep.in | 2 +- 3 files changed, 40 insertions(+), 21 deletions(-) diff --git a/kernel/arm/gemv_n_vfp.S b/kernel/arm/gemv_n_vfp.S index 505033c18..385370b7f 100644 --- a/kernel/arm/gemv_n_vfp.S +++ b/kernel/arm/gemv_n_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define M [fp, #-252 ] #define A [fp, #-256 ] @@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE+32 ] - vsub.f64 d8 , d8 , d8 + fldd d8 , FP_ZERO vmov.f64 d9 , d8 vmov.f64 d10 , d8 vmov.f64 d11 , d8 @@ -158,7 +162,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d12 , d12 , d12 + fldd d12 , FP_ZERO .endm @@ -185,7 +189,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f64 d12 , d12 , d12 + fldd d12 , FP_ZERO vmov.f64 d13 , d12 vmov.f64 d14 , d12 vmov.f64 d15 , d12 @@ -245,7 +249,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d12 , d12 , d12 + fldd d12 , FP_ZERO .endm @@ -279,7 +283,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] - vsub.f32 s8 , s8 , s8 + flds s8 , FP_ZERO vmov.f32 s9 , s8 vmov.f32 s10 , s8 vmov.f32 s11 , s8 @@ -357,7 +361,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s12 , s12 , s12 + flds s12 , FP_ZERO .endm @@ -384,7 +388,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S4 - vsub.f32 s12 , s12 , s12 + flds s12 , FP_ZERO vmov.f32 s13 , s12 vmov.f32 s14 , s12 vmov.f32 s15 , s12 @@ -445,7 +449,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s12 , s12 , s12 + flds s12 , FP_ZERO .endm @@ -494,6 +498,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp OLD_M, #0 ble gemvn_kernel_L999 diff --git a/kernel/arm/gemv_t_vfp.S b/kernel/arm/gemv_t_vfp.S index 6a56ae9d1..c3b4e0525 100644 --- a/kernel/arm/gemv_t_vfp.S +++ b/kernel/arm/gemv_t_vfp.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I r12 +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define N [fp, #-252 ] #define A [fp, #-256 ] @@ -75,8 +79,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f64 d2 , d2 , d2 - vsub.f64 d3 , d3 , d3 + fldd d2, FP_ZERO + vmov.f64 d3 , d2 .endm @@ -123,7 +127,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d2 , d2 , d2 + fldd d2, FP_ZERO + vmov.f64 d3 , d2 .endm @@ -160,8 +165,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f64 d2 , d2 , d2 - vsub.f64 d3 , d3 , d3 + fldd d2, FP_ZERO + vmov.f64 d3 , d2 .endm @@ -224,7 +229,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d2 , d2 , d2 + fldd d2, FP_ZERO + vmov.f64 d3 , d2 .endm @@ -276,8 +282,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F2 - vsub.f32 s2 , s2 , s2 - vsub.f32 s3 , s3 , s3 + flds s2 , FP_ZERO + vmov.f32 s3 , s2 + .endm @@ -321,7 +328,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s2 , s2 , s2 + flds s2 , FP_ZERO .endm @@ -356,8 +363,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S2 - vsub.f32 s2 , s2 , s2 - vsub.f32 s3 , s3 , s3 + flds s2 , FP_ZERO + vmov.f32 s3 , s2 .endm @@ -418,7 +425,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s2 , s2 , s2 + flds s2 , FP_ZERO .endm @@ -488,6 +495,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s15 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp M, #0 ble gemvt_kernel_L999 diff --git a/lapack-netlib/TESTING/nep.in b/lapack-netlib/TESTING/nep.in index c4a414910..ed6869b80 100644 --- a/lapack-netlib/TESTING/nep.in +++ b/lapack-netlib/TESTING/nep.in @@ -10,7 +10,7 @@ NEP: Data file for testing Nonsymmetric Eigenvalue Problem routines 0 5 7 3 200 Values of INIBL (nibble crossover point) 1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts) 0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2) -20.0 Threshold value +30.0 Threshold value T Put T to test the error exits 1 Code to interpret the seed NEP 21 From c2891330bc4f01a9454710dfe0033611fdb5d233 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Sun, 24 Jan 2016 17:12:07 +0100 Subject: [PATCH 06/10] updated KERNEL.ARMV6 --- kernel/arm/KERNEL.ARMV6 | 42 ++++++++++------------------------------- 1 file changed, 10 insertions(+), 32 deletions(-) diff --git a/kernel/arm/KERNEL.ARMV6 b/kernel/arm/KERNEL.ARMV6 index 7132ca7b8..16bde105b 100644 --- a/kernel/arm/KERNEL.ARMV6 +++ b/kernel/arm/KERNEL.ARMV6 @@ -1,26 +1,4 @@ -SGEMVNKERNEL = ../arm/gemv_n.c -SGEMVTKERNEL = ../arm/gemv_t.c -CGEMVNKERNEL = ../arm/zgemv_n.c -CGEMVTKERNEL = ../arm/zgemv_t.c -DGEMVNKERNEL = ../arm/gemv_n.c -DGEMVTKERNEL = ../arm/gemv_t.c - -CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c -CGEMMKERNEL = ../generic/zgemmkernel_2x2.c -CGEMMONCOPY = ../generic/zgemm_ncopy_2.c -CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c - -#ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c -#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c -#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c -#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c - - -#STRMMKERNEL = ../generic/trmmkernel_2x2.c -#SGEMMKERNEL = ../generic/gemmkernel_2x2.c -#SGEMMONCOPY = ../generic/gemm_ncopy_2.c -#SGEMMOTCOPY = ../generic/gemm_tcopy_2.c ############################################################################### @@ -96,19 +74,19 @@ DSWAPKERNEL = swap_vfp.S CSWAPKERNEL = swap_vfp.S ZSWAPKERNEL = swap_vfp.S -# BAD SGEMVNKERNEL = gemv_n_vfp.S -# BAD DGEMVNKERNEL = gemv_n_vfp.S -# CGEMVNKERNEL = cgemv_n_vfp.S +SGEMVNKERNEL = gemv_n_vfp.S +DGEMVNKERNEL = gemv_n_vfp.S +CGEMVNKERNEL = cgemv_n_vfp.S ZGEMVNKERNEL = zgemv_n_vfp.S -# BAD SGEMVTKERNEL = gemv_t_vfp.S -# BAD DGEMVTKERNEL = gemv_t_vfp.S -# CGEMVTKERNEL = cgemv_t_vfp.S +SGEMVTKERNEL = gemv_t_vfp.S +DGEMVTKERNEL = gemv_t_vfp.S +CGEMVTKERNEL = cgemv_t_vfp.S ZGEMVTKERNEL = zgemv_t_vfp.S STRMMKERNEL = strmm_kernel_4x2_vfp.S DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S -#CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S +CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S SGEMMKERNEL = sgemm_kernel_4x2_vfp.S @@ -131,9 +109,9 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o -#CGEMMKERNEL = cgemm_kernel_2x2_vfp.S -#CGEMMONCOPY = cgemm_ncopy_2_vfp.S -#CGEMMOTCOPY = cgemm_tcopy_2_vfp.S +CGEMMKERNEL = cgemm_kernel_2x2_vfp.S +CGEMMONCOPY = cgemm_ncopy_2_vfp.S +CGEMMOTCOPY = cgemm_tcopy_2_vfp.S CGEMMONCOPYOBJ = cgemm_oncopy.o CGEMMOTCOPYOBJ = cgemm_otcopy.o From a0e51e96f16b5c3a12d0d3405f087aa5e18673f8 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Mon, 25 Jan 2016 10:46:10 +0100 Subject: [PATCH 07/10] updated gemm kernels for armv7 --- kernel/arm/cgemm_kernel_2x2_vfpv3.S | 16 ++++++++++++---- kernel/arm/sgemm_kernel_4x4_vfpv3.S | 28 ++++++++++++++++++---------- kernel/arm/zgemm_kernel_2x2_vfpv3.S | 16 ++++++++++++---- 3 files changed, 42 insertions(+), 18 deletions(-) diff --git a/kernel/arm/cgemm_kernel_2x2_vfpv3.S b/kernel/arm/cgemm_kernel_2x2_vfpv3.S index 8bc200c9f..cf132a184 100644 --- a/kernel/arm/cgemm_kernel_2x2_vfpv3.S +++ b/kernel/arm/cgemm_kernel_2x2_vfpv3.S @@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -550,7 +554,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -730,7 +734,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s24, s16 vmov.f32 s25, s16 @@ -879,6 +883,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s31} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 4 * 2 str r3, LDC diff --git a/kernel/arm/sgemm_kernel_4x4_vfpv3.S b/kernel/arm/sgemm_kernel_4x4_vfpv3.S index 078f14a80..18527263d 100644 --- a/kernel/arm/sgemm_kernel_4x4_vfpv3.S +++ b/kernel/arm/sgemm_kernel_4x4_vfpv3.S @@ -73,7 +73,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] -#define ALPHA [fp, #-280] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, #-240] +#define FP_ZERO_1 [fp, #-236] + +#define ALPHA [fp, #-280] #define B [fp, #4 ] #define C [fp, #8 ] @@ -102,7 +106,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x4 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -349,7 +353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x4 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -443,7 +447,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x4 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s20, s16 vmov.f32 s24, s16 vmov.f32 s28, s16 @@ -506,7 +510,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -590,7 +594,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -651,7 +655,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s20, s16 .endm @@ -696,7 +700,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -755,7 +759,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO vmov.f32 s17, s16 .endm @@ -799,7 +803,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s16 , s16 , s16 + flds s16, FP_ZERO .endm @@ -856,6 +860,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s31} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #2 // ldc = ldc * 4 str r3, LDC diff --git a/kernel/arm/zgemm_kernel_2x2_vfpv3.S b/kernel/arm/zgemm_kernel_2x2_vfpv3.S index 29c3f4582..5a99f792f 100644 --- a/kernel/arm/zgemm_kernel_2x2_vfpv3.S +++ b/kernel/arm/zgemm_kernel_2x2_vfpv3.S @@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -404,7 +408,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -586,7 +590,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -766,7 +770,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d24, d16 vmov.f64 d25, d16 @@ -915,6 +919,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #4 // ldc = ldc * 8 * 2 str r3, LDC From de3e2d4349a5a4171dd2e0ecdf3606a19695359c Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Mon, 25 Jan 2016 11:08:56 +0100 Subject: [PATCH 08/10] updated trmm kernels for armv7 --- kernel/arm/KERNEL.ARMV7 | 14 ++++---------- kernel/arm/ctrmm_kernel_2x2_vfpv3.S | 16 ++++++++++++---- kernel/arm/dtrmm_kernel_4x4_vfpv3.S | 27 ++++++++++++++++++--------- kernel/arm/strmm_kernel_4x4_vfpv3.S | 26 +++++++++++++++++--------- kernel/arm/ztrmm_kernel_2x2_vfpv3.S | 16 ++++++++++++---- 5 files changed, 63 insertions(+), 36 deletions(-) diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index c4354864f..18231b966 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -1,8 +1,3 @@ -SGEMVNKERNEL = ../arm/gemv_n.c -SGEMVTKERNEL = ../arm/gemv_t.c -CGEMVNKERNEL = ../arm/zgemv_n.c -CGEMVTKERNEL = ../arm/zgemv_t.c - ################################################################################# SAMAXKERNEL = iamax_vfp.S @@ -77,14 +72,14 @@ DSCALKERNEL = scal.c CSCALKERNEL = zscal.c ZSCALKERNEL = zscal.c -# BAD SGEMVNKERNEL = gemv_n_vfp.S +SGEMVNKERNEL = gemv_n_vfp.S DGEMVNKERNEL = gemv_n_vfp.S -#CGEMVNKERNEL = cgemv_n_vfp.S +CGEMVNKERNEL = cgemv_n_vfp.S ZGEMVNKERNEL = zgemv_n_vfp.S -# BAD SGEMVTKERNEL = gemv_t_vfp.S +SGEMVTKERNEL = gemv_t_vfp.S DGEMVTKERNEL = gemv_t_vfp.S -#CGEMVTKERNEL = cgemv_t_vfp.S +CGEMVTKERNEL = cgemv_t_vfp.S ZGEMVTKERNEL = zgemv_t_vfp.S STRMMKERNEL = strmm_kernel_4x4_vfpv3.S @@ -92,7 +87,6 @@ DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S -#SGEMMKERNEL = ../generic/gemmkernel_2x2.c SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S SGEMMINCOPY = SGEMMITCOPY = diff --git a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S index f06e260ea..97bd88c69 100644 --- a/kernel/arm/ctrmm_kernel_2x2_vfpv3.S +++ b/kernel/arm/ctrmm_kernel_2x2_vfpv3.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-236] +#define FP_ZERO_0 [fp, #-236] +#define FP_ZERO_1 [fp, #-232] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 + flds s16 , FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -351,7 +355,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 + flds s16 , FP_ZERO vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -529,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 + flds s16 , FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -706,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s16 , s16 , s16 + flds s16 , FP_ZERO vmov.f32 s17, s16 vmov.f32 s24, s16 vmov.f32 s25, s16 @@ -852,6 +856,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s31} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 4 * 2 str r3, LDC diff --git a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S index 04cc451d1..c7e455f16 100644 --- a/kernel/arm/dtrmm_kernel_4x4_vfpv3.S +++ b/kernel/arm/dtrmm_kernel_4x4_vfpv3.S @@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-236] +#define FP_ZERO_0 [fp, #-236] +#define FP_ZERO_1 [fp, #-232] + + #define ALPHA [fp, #-276 ] #define B [fp, #4 ] @@ -89,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x4 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x4 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -468,7 +473,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x4 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d20, d16 vmov.f64 d24, d16 vmov.f64 d28, d16 @@ -527,7 +532,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -601,7 +606,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -656,7 +661,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d20, d16 .endm @@ -699,7 +704,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -753,7 +758,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO vmov.f64 d17, d16 .endm @@ -794,7 +799,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d16 , d16 , d16 + fldd d16, FP_ZERO .endm @@ -850,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #3 // ldc = ldc * 8 str r3, LDC diff --git a/kernel/arm/strmm_kernel_4x4_vfpv3.S b/kernel/arm/strmm_kernel_4x4_vfpv3.S index f6342a07d..0dd03ac85 100644 --- a/kernel/arm/strmm_kernel_4x4_vfpv3.S +++ b/kernel/arm/strmm_kernel_4x4_vfpv3.S @@ -58,6 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define K [fp, #-264 ] #define A [fp, #-268 ] +#define FP_ZERO [fp, #-240] +#define FP_ZERO_0 [fp, # -240] +#define FP_ZERO_1 [fp, # -236] + #define ALPHA [fp, #-280] #define B [fp, #4 ] @@ -88,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x4 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -322,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x4 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -405,7 +409,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x4 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s20, s16 vmov.f32 s24, s16 vmov.f32 s28, s16 @@ -464,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x2 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -538,7 +542,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s20, s16 vmov.f32 s21, s16 @@ -593,7 +597,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s20, s16 .endm @@ -636,7 +640,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT4x1 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s17, s16 vmov.f32 s18, s16 vmov.f32 s19, s16 @@ -690,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO vmov.f32 s17, s16 .endm @@ -731,7 +735,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f32 s16 , s16 , s16 + flds S16, FP_ZERO .endm @@ -787,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { s8 - s31} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #2 // ldc = ldc * 4 str r3, LDC diff --git a/kernel/arm/ztrmm_kernel_2x2_vfpv3.S b/kernel/arm/ztrmm_kernel_2x2_vfpv3.S index 761dbccee..5a808ccbc 100644 --- a/kernel/arm/ztrmm_kernel_2x2_vfpv3.S +++ b/kernel/arm/ztrmm_kernel_2x2_vfpv3.S @@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define N [fp, #-260 ] #define K [fp, #-264 ] +#define FP_ZERO [fp, #-236] +#define FP_ZERO_0 [fp, #-236] +#define FP_ZERO_1 [fp, #-232] + #define ALPHA_I [fp, #-272] #define ALPHA_R [fp, #-280] @@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x2 - vsub.f64 d16 , d16 , d16 + fldd d16 , FP_ZERO vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -388,7 +392,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x2 - vsub.f64 d16 , d16 , d16 + fldd d16 , FP_ZERO vmov.f64 d17, d16 vmov.f64 d20, d16 vmov.f64 d21, d16 @@ -566,7 +570,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT2x1 - vsub.f64 d16 , d16 , d16 + fldd d16 , FP_ZERO vmov.f64 d17, d16 vmov.f64 d18, d16 vmov.f64 d19, d16 @@ -743,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT1x1 - vsub.f64 d16 , d16 , d16 + fldd d16 , FP_ZERO vmov.f64 d17, d16 vmov.f64 d24, d16 vmov.f64 d25, d16 @@ -889,6 +893,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. sub r3, fp, #128 vstm r3, { d8 - d15} // store floating point registers + movs r4, #0 + str r4, FP_ZERO + str r4, FP_ZERO_1 + ldr r3, OLD_LDC lsl r3, r3, #4 // ldc = ldc * 8 * 2 str r3, LDC From b4ede558a5c7e782400b42532b5f817c6932f924 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Mon, 25 Jan 2016 11:55:25 +0100 Subject: [PATCH 09/10] updated nrm2 kernel for armv7 --- kernel/arm/nrm2_vfpv3.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/arm/nrm2_vfpv3.S b/kernel/arm/nrm2_vfpv3.S index 34b251e9a..f676f514a 100644 --- a/kernel/arm/nrm2_vfpv3.S +++ b/kernel/arm/nrm2_vfpv3.S @@ -405,12 +405,15 @@ KERNEL_S1_END_\@: .align 5 #if defined(DOUBLE) - vsub.f64 d0 , d0 , d0 // scale=0.0 + movs r12 , #0 + vmov.f32 s0 , r12 // scale=0.0 + vcvt.f64.f32 d0, s0 vmov.f64 d1 , #1.0 // ssq=1.0 vmov.f64 d7 , d1 // value 1.0 vmov.f64 d6 , d0 // value 0.0 #else - vsub.f32 s0 , s0 , s0 // scale=0.0 + movs r12 , #0 + vmov.f32 s0 , r12 // scale=0.0 vmov.f32 s1 , #1.0 // ssq=1.0 vmov.f32 s7 , s1 // value 1.0 vmov.f32 s6 , s0 // value 0.0 From 63a7d7fb2427c7dd4f3c01720b0636ede0260665 Mon Sep 17 00:00:00 2001 From: Werner Saar Date: Mon, 25 Jan 2016 15:00:13 +0100 Subject: [PATCH 10/10] updated gemv_n_vfpv3.S for armv7 --- kernel/arm/KERNEL.ARMV7 | 12 ++---------- kernel/arm/gemv_n_vfpv3.S | 24 ++++++++++++++++-------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index 18231b966..d5cd94fbd 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -72,8 +72,8 @@ DSCALKERNEL = scal.c CSCALKERNEL = zscal.c ZSCALKERNEL = zscal.c -SGEMVNKERNEL = gemv_n_vfp.S -DGEMVNKERNEL = gemv_n_vfp.S +SGEMVNKERNEL = gemv_n_vfpv3.S +DGEMVNKERNEL = gemv_n_vfpv3.S CGEMVNKERNEL = cgemv_n_vfp.S ZGEMVNKERNEL = zgemv_n_vfp.S @@ -88,22 +88,14 @@ CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S -SGEMMINCOPY = -SGEMMITCOPY = SGEMMONCOPY = sgemm_ncopy_4_vfp.S SGEMMOTCOPY = sgemm_tcopy_4_vfp.S -SGEMMINCOPYOBJ = -SGEMMITCOPYOBJ = SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S -DGEMMINCOPY = -DGEMMITCOPY = DGEMMONCOPY = dgemm_ncopy_4_vfp.S DGEMMOTCOPY = dgemm_tcopy_4_vfp.S -DGEMMINCOPYOBJ = -DGEMMITCOPYOBJ = DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o diff --git a/kernel/arm/gemv_n_vfpv3.S b/kernel/arm/gemv_n_vfpv3.S index 0e9ba0c9c..e7938e81c 100644 --- a/kernel/arm/gemv_n_vfpv3.S +++ b/kernel/arm/gemv_n_vfpv3.S @@ -62,6 +62,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define M [fp, #-252 ] #define A [fp, #-256 ] +#define FP_ZERO [fp, #-228] +#define FP_ZERO_0 [fp, #-228] +#define FP_ZERO_1 [fp, #-224] + #define X_PRE 64 #define Y_PRE 0 @@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE+32 ] - vsub.f64 d24 , d24 , d24 + fldd d24 , FP_ZERO vmov.f64 d25 , d24 vmov.f64 d26 , d24 vmov.f64 d27 , d24 @@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f64 d24 , d24 , d24 + fldd d24 , FP_ZERO .endm @@ -175,7 +179,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S8 - vsub.f64 d24 , d24 , d24 + fldd d24 , FP_ZERO vmov.f64 d25 , d24 vmov.f64 d26 , d24 vmov.f64 d27 , d24 @@ -269,7 +273,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f64 d24 , d24 , d24 + fldd d24 , FP_ZERO .endm @@ -302,7 +306,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pld [ YO , #Y_PRE ] - vsub.f32 s24 , s24 , s24 + flds s24 , FP_ZERO vmov.f32 s25 , s24 vmov.f32 s26 , s24 vmov.f32 s27 , s24 @@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_F1 - vsub.f32 s24 , s24 , s24 + flds s24 , FP_ZERO .endm @@ -396,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S8 - vsub.f32 s24 , s24 , s24 + flds s24 , FP_ZERO vmov.f32 s25 , s24 vmov.f32 s26 , s24 vmov.f32 s27 , s24 @@ -489,7 +493,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro INIT_S1 - vsub.f32 s24 , s24 , s24 + flds s24 , FP_ZERO .endm @@ -538,6 +542,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vstm r12, { s8 - s31 } // store floating point registers #endif + movs r12, #0 + str r12, FP_ZERO + str r12, FP_ZERO_1 + cmp OLD_M, #0 ble gemvn_kernel_L999