updated gemv_n_vfpv3.S for armv7
This commit is contained in:
parent
b4ede558a5
commit
63a7d7fb24
|
@ -72,8 +72,8 @@ DSCALKERNEL = scal.c
|
||||||
CSCALKERNEL = zscal.c
|
CSCALKERNEL = zscal.c
|
||||||
ZSCALKERNEL = zscal.c
|
ZSCALKERNEL = zscal.c
|
||||||
|
|
||||||
SGEMVNKERNEL = gemv_n_vfp.S
|
SGEMVNKERNEL = gemv_n_vfpv3.S
|
||||||
DGEMVNKERNEL = gemv_n_vfp.S
|
DGEMVNKERNEL = gemv_n_vfpv3.S
|
||||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||||
|
|
||||||
|
@ -88,22 +88,14 @@ CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
||||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
||||||
SGEMMINCOPY =
|
|
||||||
SGEMMITCOPY =
|
|
||||||
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
||||||
SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
|
SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
|
||||||
SGEMMINCOPYOBJ =
|
|
||||||
SGEMMITCOPYOBJ =
|
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
|
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
|
||||||
DGEMMINCOPY =
|
|
||||||
DGEMMITCOPY =
|
|
||||||
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
|
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
|
||||||
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
|
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
|
||||||
DGEMMINCOPYOBJ =
|
|
||||||
DGEMMITCOPYOBJ =
|
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
|
|
|
@ -62,6 +62,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define M [fp, #-252 ]
|
#define M [fp, #-252 ]
|
||||||
#define A [fp, #-256 ]
|
#define A [fp, #-256 ]
|
||||||
|
|
||||||
|
#define FP_ZERO [fp, #-228]
|
||||||
|
#define FP_ZERO_0 [fp, #-228]
|
||||||
|
#define FP_ZERO_1 [fp, #-224]
|
||||||
|
|
||||||
|
|
||||||
#define X_PRE 64
|
#define X_PRE 64
|
||||||
#define Y_PRE 0
|
#define Y_PRE 0
|
||||||
|
@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
pld [ YO , #Y_PRE ]
|
pld [ YO , #Y_PRE ]
|
||||||
pld [ YO , #Y_PRE+32 ]
|
pld [ YO , #Y_PRE+32 ]
|
||||||
|
|
||||||
vsub.f64 d24 , d24 , d24
|
fldd d24 , FP_ZERO
|
||||||
vmov.f64 d25 , d24
|
vmov.f64 d25 , d24
|
||||||
vmov.f64 d26 , d24
|
vmov.f64 d26 , d24
|
||||||
vmov.f64 d27 , d24
|
vmov.f64 d27 , d24
|
||||||
|
@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
.macro INIT_F1
|
.macro INIT_F1
|
||||||
|
|
||||||
vsub.f64 d24 , d24 , d24
|
fldd d24 , FP_ZERO
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -175,7 +179,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
.macro INIT_S8
|
.macro INIT_S8
|
||||||
|
|
||||||
vsub.f64 d24 , d24 , d24
|
fldd d24 , FP_ZERO
|
||||||
vmov.f64 d25 , d24
|
vmov.f64 d25 , d24
|
||||||
vmov.f64 d26 , d24
|
vmov.f64 d26 , d24
|
||||||
vmov.f64 d27 , d24
|
vmov.f64 d27 , d24
|
||||||
|
@ -269,7 +273,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
.macro INIT_S1
|
.macro INIT_S1
|
||||||
|
|
||||||
vsub.f64 d24 , d24 , d24
|
fldd d24 , FP_ZERO
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -302,7 +306,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
pld [ YO , #Y_PRE ]
|
pld [ YO , #Y_PRE ]
|
||||||
|
|
||||||
vsub.f32 s24 , s24 , s24
|
flds s24 , FP_ZERO
|
||||||
vmov.f32 s25 , s24
|
vmov.f32 s25 , s24
|
||||||
vmov.f32 s26 , s24
|
vmov.f32 s26 , s24
|
||||||
vmov.f32 s27 , s24
|
vmov.f32 s27 , s24
|
||||||
|
@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
.macro INIT_F1
|
.macro INIT_F1
|
||||||
|
|
||||||
vsub.f32 s24 , s24 , s24
|
flds s24 , FP_ZERO
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -396,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
.macro INIT_S8
|
.macro INIT_S8
|
||||||
|
|
||||||
vsub.f32 s24 , s24 , s24
|
flds s24 , FP_ZERO
|
||||||
vmov.f32 s25 , s24
|
vmov.f32 s25 , s24
|
||||||
vmov.f32 s26 , s24
|
vmov.f32 s26 , s24
|
||||||
vmov.f32 s27 , s24
|
vmov.f32 s27 , s24
|
||||||
|
@ -489,7 +493,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
.macro INIT_S1
|
.macro INIT_S1
|
||||||
|
|
||||||
vsub.f32 s24 , s24 , s24
|
flds s24 , FP_ZERO
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
@ -538,6 +542,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
vstm r12, { s8 - s31 } // store floating point registers
|
vstm r12, { s8 - s31 } // store floating point registers
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
movs r12, #0
|
||||||
|
str r12, FP_ZERO
|
||||||
|
str r12, FP_ZERO_1
|
||||||
|
|
||||||
cmp OLD_M, #0
|
cmp OLD_M, #0
|
||||||
ble gemvn_kernel_L999
|
ble gemvn_kernel_L999
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue