updated trmm kernels for armv7
This commit is contained in:
parent
a0e51e96f1
commit
de3e2d4349
|
@ -1,8 +1,3 @@
|
|||
SGEMVNKERNEL = ../arm/gemv_n.c
|
||||
SGEMVTKERNEL = ../arm/gemv_t.c
|
||||
CGEMVNKERNEL = ../arm/zgemv_n.c
|
||||
CGEMVTKERNEL = ../arm/zgemv_t.c
|
||||
|
||||
|
||||
#################################################################################
|
||||
SAMAXKERNEL = iamax_vfp.S
|
||||
|
@ -77,14 +72,14 @@ DSCALKERNEL = scal.c
|
|||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
|
||||
# BAD SGEMVNKERNEL = gemv_n_vfp.S
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
#CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
# BAD SGEMVTKERNEL = gemv_t_vfp.S
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
#CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
||||
|
@ -92,7 +87,6 @@ DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
|||
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
||||
|
||||
#SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
||||
SGEMMINCOPY =
|
||||
SGEMMITCOPY =
|
||||
|
|
|
@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define N [fp, #-260 ]
|
||||
#define K [fp, #-264 ]
|
||||
|
||||
#define FP_ZERO [fp, #-236]
|
||||
#define FP_ZERO_0 [fp, #-236]
|
||||
#define FP_ZERO_1 [fp, #-232]
|
||||
|
||||
#define ALPHA_I [fp, #-272]
|
||||
#define ALPHA_R [fp, #-280]
|
||||
|
||||
|
@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16 , FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -351,7 +355,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16 , FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -529,7 +533,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16 , FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -706,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds s16 , FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s25, s16
|
||||
|
@ -852,6 +856,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r3, fp, #128
|
||||
vstm r3, { s8 - s31} // store floating point registers
|
||||
|
||||
movs r4, #0
|
||||
str r4, FP_ZERO
|
||||
str r4, FP_ZERO_1
|
||||
|
||||
ldr r3, OLD_LDC
|
||||
lsl r3, r3, #3 // ldc = ldc * 4 * 2
|
||||
str r3, LDC
|
||||
|
|
|
@ -59,6 +59,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define K [fp, #-264 ]
|
||||
#define A [fp, #-268 ]
|
||||
|
||||
#define FP_ZERO [fp, #-236]
|
||||
#define FP_ZERO_0 [fp, #-236]
|
||||
#define FP_ZERO_1 [fp, #-232]
|
||||
|
||||
|
||||
#define ALPHA [fp, #-276 ]
|
||||
|
||||
#define B [fp, #4 ]
|
||||
|
@ -89,7 +94,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -386,7 +391,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -468,7 +473,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x4
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d24, d16
|
||||
vmov.f64 d28, d16
|
||||
|
@ -527,7 +532,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -601,7 +606,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -656,7 +661,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d20, d16
|
||||
|
||||
.endm
|
||||
|
@ -699,7 +704,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -753,7 +758,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
|
||||
.endm
|
||||
|
@ -794,7 +799,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16, FP_ZERO
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -850,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r3, fp, #128
|
||||
vstm r3, { d8 - d15} // store floating point registers
|
||||
|
||||
movs r4, #0
|
||||
str r4, FP_ZERO
|
||||
str r4, FP_ZERO_1
|
||||
|
||||
ldr r3, OLD_LDC
|
||||
lsl r3, r3, #3 // ldc = ldc * 8
|
||||
str r3, LDC
|
||||
|
|
|
@ -58,6 +58,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define K [fp, #-264 ]
|
||||
#define A [fp, #-268 ]
|
||||
|
||||
#define FP_ZERO [fp, #-240]
|
||||
#define FP_ZERO_0 [fp, # -240]
|
||||
#define FP_ZERO_1 [fp, # -236]
|
||||
|
||||
#define ALPHA [fp, #-280]
|
||||
|
||||
#define B [fp, #4 ]
|
||||
|
@ -88,7 +92,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -322,7 +326,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -405,7 +409,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x4
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s24, s16
|
||||
vmov.f32 s28, s16
|
||||
|
@ -464,7 +468,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -538,7 +542,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s20, s16
|
||||
vmov.f32 s21, s16
|
||||
|
@ -593,7 +597,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s20, s16
|
||||
|
||||
.endm
|
||||
|
@ -636,7 +640,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT4x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
vmov.f32 s18, s16
|
||||
vmov.f32 s19, s16
|
||||
|
@ -690,7 +694,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
vmov.f32 s17, s16
|
||||
|
||||
.endm
|
||||
|
@ -731,7 +735,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f32 s16 , s16 , s16
|
||||
flds S16, FP_ZERO
|
||||
|
||||
.endm
|
||||
|
||||
|
@ -787,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r3, fp, #128
|
||||
vstm r3, { s8 - s31} // store floating point registers
|
||||
|
||||
movs r4, #0
|
||||
str r4, FP_ZERO
|
||||
str r4, FP_ZERO_1
|
||||
|
||||
ldr r3, OLD_LDC
|
||||
lsl r3, r3, #2 // ldc = ldc * 4
|
||||
str r3, LDC
|
||||
|
|
|
@ -59,6 +59,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define N [fp, #-260 ]
|
||||
#define K [fp, #-264 ]
|
||||
|
||||
#define FP_ZERO [fp, #-236]
|
||||
#define FP_ZERO_0 [fp, #-236]
|
||||
#define FP_ZERO_1 [fp, #-232]
|
||||
|
||||
#define ALPHA_I [fp, #-272]
|
||||
#define ALPHA_R [fp, #-280]
|
||||
|
||||
|
@ -134,7 +138,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16 , FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -388,7 +392,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x2
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16 , FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d20, d16
|
||||
vmov.f64 d21, d16
|
||||
|
@ -566,7 +570,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT2x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16 , FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d18, d16
|
||||
vmov.f64 d19, d16
|
||||
|
@ -743,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT1x1
|
||||
|
||||
vsub.f64 d16 , d16 , d16
|
||||
fldd d16 , FP_ZERO
|
||||
vmov.f64 d17, d16
|
||||
vmov.f64 d24, d16
|
||||
vmov.f64 d25, d16
|
||||
|
@ -889,6 +893,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
sub r3, fp, #128
|
||||
vstm r3, { d8 - d15} // store floating point registers
|
||||
|
||||
movs r4, #0
|
||||
str r4, FP_ZERO
|
||||
str r4, FP_ZERO_1
|
||||
|
||||
ldr r3, OLD_LDC
|
||||
lsl r3, r3, #4 // ldc = ldc * 8 * 2
|
||||
str r3, LDC
|
||||
|
|
Loading…
Reference in New Issue