updated gemm kernels for armv7
This commit is contained in:
parent
c2891330bc
commit
a0e51e96f1
|
@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define N [fp, #-260 ]
|
||||
#define K [fp, #-264 ]
|
||||
|
||||
#define FP_ZERO [fp, #-240]
|
||||
#define FP_ZERO_0 [fp, # -240]
|
||||
#define FP_ZERO_1 [fp, # -236]
|
||||
|
||||
#define ALPHA_I [fp, #-272]
|
||||
#define ALPHA_R [fp, #-280]
|
||||
|
||||
|
@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -550,7 +554,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -730,7 +734,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s25, s16
|
||||
|
@ -879,6 +883,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r3, fp, #128
|
||||
vstm r3, { s8 - s31} // store floating point registers
|
||||
|
||||
movs r4, #0
|
||||
str r4, FP_ZERO
|
||||
str r4, FP_ZERO_1
|
||||
|
||||
ldr r3, OLD_LDC
|
||||
lsl r3, r3, #3 // ldc = ldc * 4 * 2
|
||||
str r3, LDC
|
||||
|
|
|
@ -73,7 +73,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define K [fp, #-264 ]
|
||||
#define A [fp, #-268 ]
|
||||
|
||||
#define ALPHA [fp, #-280]
|
||||
#define FP_ZERO [fp, #-240]
|
||||
#define FP_ZERO_0 [fp, #-240]
|
||||
#define FP_ZERO_1 [fp, #-236]
|
||||
|
||||
#define ALPHA [fp, #-280]
|
||||
|
||||
#define B [fp, #4 ]
|
||||
#define C [fp, #8 ]
|
||||
|
@ -102,7 +106,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -349,7 +353,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -443,7 +447,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s28, s16
|
||||
|
@ -506,7 +510,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -590,7 +594,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -651,7 +655,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s20, s16
|
||||
|
||||
.endm
|
||||
|
@ -696,7 +700,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -755,7 +759,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
|
||||
.endm
|
||||
|
@ -799,7 +803,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16, FP_ZERO
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -856,6 +860,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r3, fp, #128
|
||||
vstm r3, { s8 - s31} // store floating point registers
|
||||
|
||||
movs r4, #0
|
||||
str r4, FP_ZERO
|
||||
str r4, FP_ZERO_1
|
||||
|
||||
ldr r3, OLD_LDC
|
||||
lsl r3, r3, #2 // ldc = ldc * 4
|
||||
str r3, LDC
|
||||
|
|
|
@ -73,6 +73,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define N [fp, #-260 ]
|
||||
#define K [fp, #-264 ]
|
||||
|
||||
#define FP_ZERO [fp, #-240]
|
||||
#define FP_ZERO_0 [fp, # -240]
|
||||
#define FP_ZERO_1 [fp, # -236]
|
||||
|
||||
#define ALPHA_I [fp, #-272]
|
||||
#define ALPHA_R [fp, #-280]
|
||||
|
||||
|
@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -404,7 +408,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -586,7 +590,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -766,7 +770,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d24, d16
|
||||
vmov.f64 d25, d16
|
||||
|
@ -915,6 +919,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r3, fp, #128
|
||||
vstm r3, { d8 - d15} // store floating point registers
|
||||
|
||||
movs r4, #0
|
||||
str r4, FP_ZERO
|
||||
str r4, FP_ZERO_1
|
||||
|
||||
ldr r3, OLD_LDC
|
||||
lsl r3, r3, #4 // ldc = ldc * 8 * 2
|
||||
str r3, LDC
|
||||
|
|
Loading…
Reference in New Issue