updated gemm_kernels for armv6
This commit is contained in:
@@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#define N [fp, #-260 ]
|
#define N [fp, #-260 ]
|
||||||
#define K [fp, #-264 ]
|
#define K [fp, #-264 ]
|
||||||
|
|
||||||
|
#define FP_ZERO [fp, #-240]
|
||||||
|
#define FP_ZERO_0 [fp, # -240]
|
||||||
|
#define FP_ZERO_1 [fp, # -236]
|
||||||
|
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
@@ -138,7 +142,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x2
|
.macro INIT2x2
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8 , FP_ZERO
|
||||||
vmov.f32 s9 , s8
|
vmov.f32 s9 , s8
|
||||||
vmov.f32 s10, s8
|
vmov.f32 s10, s8
|
||||||
vmov.f32 s11, s8
|
vmov.f32 s11, s8
|
||||||
@@ -340,7 +344,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x2
|
.macro INIT1x2
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8 , FP_ZERO
|
||||||
vmov.f32 s9 , s8
|
vmov.f32 s9 , s8
|
||||||
vmov.f32 s12, s8
|
vmov.f32 s12, s8
|
||||||
vmov.f32 s13, s8
|
vmov.f32 s13, s8
|
||||||
@@ -514,7 +518,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x1
|
.macro INIT2x1
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8 , FP_ZERO
|
||||||
vmov.f32 s9 , s8
|
vmov.f32 s9 , s8
|
||||||
vmov.f32 s10, s8
|
vmov.f32 s10, s8
|
||||||
vmov.f32 s11, s8
|
vmov.f32 s11, s8
|
||||||
@@ -681,7 +685,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x1
|
.macro INIT1x1
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8 , FP_ZERO
|
||||||
vmov.f32 s9 , s8
|
vmov.f32 s9 , s8
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
@@ -822,6 +826,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
sub r3, fp, #128
|
sub r3, fp, #128
|
||||||
vstm r3, { s8 - s15} // store floating point registers
|
vstm r3, { s8 - s15} // store floating point registers
|
||||||
|
|
||||||
|
movs r4, #0
|
||||||
|
str r4, FP_ZERO
|
||||||
|
str r4, FP_ZERO_1
|
||||||
|
|
||||||
ldr r3, OLD_LDC
|
ldr r3, OLD_LDC
|
||||||
lsl r3, r3, #3 // ldc = ldc * 4 * 2
|
lsl r3, r3, #3 // ldc = ldc * 4 * 2
|
||||||
str r3, LDC
|
str r3, LDC
|
||||||
|
|||||||
@@ -56,8 +56,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#define K [fp, #-264 ]
|
#define K [fp, #-264 ]
|
||||||
#define A [fp, #-268 ]
|
#define A [fp, #-268 ]
|
||||||
|
|
||||||
|
#define FP_ZERO [fp, #-240]
|
||||||
|
#define FP_ZERO_0 [fp, # -240]
|
||||||
|
#define FP_ZERO_1 [fp, # -236]
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
|
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
@@ -85,7 +90,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT4x2
|
.macro INIT4x2
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8, FP_ZERO
|
||||||
vmov.f64 d9, d8
|
vmov.f64 d9, d8
|
||||||
vmov.f64 d10, d8
|
vmov.f64 d10, d8
|
||||||
vmov.f64 d11, d8
|
vmov.f64 d11, d8
|
||||||
@@ -173,7 +178,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x2
|
.macro INIT2x2
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8, FP_ZERO
|
||||||
vmov.f64 d9, d8
|
vmov.f64 d9, d8
|
||||||
vmov.f64 d12, d8
|
vmov.f64 d12, d8
|
||||||
vmov.f64 d13, d8
|
vmov.f64 d13, d8
|
||||||
@@ -233,7 +238,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x2
|
.macro INIT1x2
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8, FP_ZERO
|
||||||
vmov.f64 d12, d8
|
vmov.f64 d12, d8
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
@@ -283,7 +288,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT4x1
|
.macro INIT4x1
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8, FP_ZERO
|
||||||
vmov.f64 d9, d8
|
vmov.f64 d9, d8
|
||||||
vmov.f64 d10, d8
|
vmov.f64 d10, d8
|
||||||
vmov.f64 d11, d8
|
vmov.f64 d11, d8
|
||||||
@@ -338,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x1
|
.macro INIT2x1
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8, FP_ZERO
|
||||||
vmov.f64 d9 , d8
|
vmov.f64 d9 , d8
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
@@ -380,7 +385,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x1
|
.macro INIT1x1
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8, FP_ZERO
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
@@ -433,6 +438,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
sub r3, fp, #128
|
sub r3, fp, #128
|
||||||
vstm r3, { d8 - d15} // store floating point registers
|
vstm r3, { d8 - d15} // store floating point registers
|
||||||
|
|
||||||
|
movs r4, #0
|
||||||
|
str r4, FP_ZERO
|
||||||
|
str r4, FP_ZERO_1
|
||||||
|
|
||||||
ldr r3, OLD_LDC
|
ldr r3, OLD_LDC
|
||||||
lsl r3, r3, #3 // ldc = ldc * 8
|
lsl r3, r3, #3 // ldc = ldc * 8
|
||||||
str r3, LDC
|
str r3, LDC
|
||||||
|
|||||||
@@ -56,6 +56,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#define K [fp, #-264 ]
|
#define K [fp, #-264 ]
|
||||||
#define A [fp, #-268 ]
|
#define A [fp, #-268 ]
|
||||||
|
|
||||||
|
#define FP_ZERO [fp, #-240]
|
||||||
|
#define FP_ZERO_0 [fp, # -240]
|
||||||
|
#define FP_ZERO_1 [fp, # -236]
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
@@ -85,7 +89,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT4x2
|
.macro INIT4x2
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8, FP_ZERO
|
||||||
vmov.f32 s9, s8
|
vmov.f32 s9, s8
|
||||||
vmov.f32 s10, s8
|
vmov.f32 s10, s8
|
||||||
vmov.f32 s11, s8
|
vmov.f32 s11, s8
|
||||||
@@ -161,7 +165,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x2
|
.macro INIT2x2
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8, FP_ZERO
|
||||||
vmov.f32 s9, s8
|
vmov.f32 s9, s8
|
||||||
vmov.f32 s12, s8
|
vmov.f32 s12, s8
|
||||||
vmov.f32 s13, s8
|
vmov.f32 s13, s8
|
||||||
@@ -221,7 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x2
|
.macro INIT1x2
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8, FP_ZERO
|
||||||
vmov.f32 s12, s8
|
vmov.f32 s12, s8
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
@@ -271,7 +275,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT4x1
|
.macro INIT4x1
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8, FP_ZERO
|
||||||
vmov.f32 s9, s8
|
vmov.f32 s9, s8
|
||||||
vmov.f32 s10, s8
|
vmov.f32 s10, s8
|
||||||
vmov.f32 s11, s8
|
vmov.f32 s11, s8
|
||||||
@@ -326,7 +330,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x1
|
.macro INIT2x1
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8, FP_ZERO
|
||||||
vmov.f32 s9 , s8
|
vmov.f32 s9 , s8
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
@@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x1
|
.macro INIT1x1
|
||||||
|
|
||||||
vsub.f32 s8 , s8 , s8
|
flds s8, FP_ZERO
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
@@ -421,6 +425,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
sub r3, fp, #128
|
sub r3, fp, #128
|
||||||
vstm r3, { s8 - s15} // store floating point registers
|
vstm r3, { s8 - s15} // store floating point registers
|
||||||
|
|
||||||
|
movs r4, #0
|
||||||
|
str r4, FP_ZERO
|
||||||
|
str r4, FP_ZERO_1
|
||||||
|
|
||||||
ldr r3, OLD_LDC
|
ldr r3, OLD_LDC
|
||||||
lsl r3, r3, #2 // ldc = ldc * 4
|
lsl r3, r3, #2 // ldc = ldc * 4
|
||||||
str r3, LDC
|
str r3, LDC
|
||||||
|
|||||||
@@ -57,6 +57,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
#define N [fp, #-260 ]
|
#define N [fp, #-260 ]
|
||||||
#define K [fp, #-264 ]
|
#define K [fp, #-264 ]
|
||||||
|
|
||||||
|
#define FP_ZERO [fp, #-240]
|
||||||
|
#define FP_ZERO_0 [fp, # -240]
|
||||||
|
#define FP_ZERO_1 [fp, # -236]
|
||||||
|
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
@@ -131,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x2
|
.macro INIT2x2
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8 , FP_ZERO
|
||||||
vmov.f64 d9 , d8
|
vmov.f64 d9 , d8
|
||||||
vmov.f64 d10, d8
|
vmov.f64 d10, d8
|
||||||
vmov.f64 d11, d8
|
vmov.f64 d11, d8
|
||||||
@@ -383,7 +387,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x2
|
.macro INIT1x2
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8 , FP_ZERO
|
||||||
vmov.f64 d9 , d8
|
vmov.f64 d9 , d8
|
||||||
vmov.f64 d12, d8
|
vmov.f64 d12, d8
|
||||||
vmov.f64 d13, d8
|
vmov.f64 d13, d8
|
||||||
@@ -557,7 +561,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT2x1
|
.macro INIT2x1
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8 , FP_ZERO
|
||||||
vmov.f64 d9 , d8
|
vmov.f64 d9 , d8
|
||||||
vmov.f64 d10, d8
|
vmov.f64 d10, d8
|
||||||
vmov.f64 d11, d8
|
vmov.f64 d11, d8
|
||||||
@@ -724,7 +728,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
|
|
||||||
.macro INIT1x1
|
.macro INIT1x1
|
||||||
|
|
||||||
vsub.f64 d8 , d8 , d8
|
fldd d8 , FP_ZERO
|
||||||
vmov.f64 d9 , d8
|
vmov.f64 d9 , d8
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
@@ -869,6 +873,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||||||
sub r3, fp, #128
|
sub r3, fp, #128
|
||||||
vstm r3, { d8 - d15} // store floating point registers
|
vstm r3, { d8 - d15} // store floating point registers
|
||||||
|
|
||||||
|
movs r4, #0
|
||||||
|
str r4, FP_ZERO
|
||||||
|
str r4, FP_ZERO_1
|
||||||
|
|
||||||
ldr r3, OLD_LDC
|
ldr r3, OLD_LDC
|
||||||
lsl r3, r3, #4 // ldc = ldc * 8 * 2
|
lsl r3, r3, #4 // ldc = ldc * 8 * 2
|
||||||
str r3, LDC
|
str r3, LDC
|
||||||
|
|||||||
Reference in New Issue
Block a user