updated gemv_n_vfpv3.S for armv7

This commit is contained in:
Werner Saar 2016-01-25 15:00:13 +01:00
parent b4ede558a5
commit 63a7d7fb24
2 changed files with 18 additions and 18 deletions

View File

@ -72,8 +72,8 @@ DSCALKERNEL = scal.c
CSCALKERNEL = zscal.c CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c ZSCALKERNEL = zscal.c
SGEMVNKERNEL = gemv_n_vfp.S SGEMVNKERNEL = gemv_n_vfpv3.S
DGEMVNKERNEL = gemv_n_vfp.S DGEMVNKERNEL = gemv_n_vfpv3.S
CGEMVNKERNEL = cgemv_n_vfp.S CGEMVNKERNEL = cgemv_n_vfp.S
ZGEMVNKERNEL = zgemv_n_vfp.S ZGEMVNKERNEL = zgemv_n_vfp.S
@ -88,22 +88,14 @@ CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
SGEMMINCOPY =
SGEMMITCOPY =
SGEMMONCOPY = sgemm_ncopy_4_vfp.S SGEMMONCOPY = sgemm_ncopy_4_vfp.S
SGEMMOTCOPY = sgemm_tcopy_4_vfp.S SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
SGEMMINCOPYOBJ =
SGEMMITCOPYOBJ =
SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = dgemm_ncopy_4_vfp.S DGEMMONCOPY = dgemm_ncopy_4_vfp.S
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o

View File

@ -62,6 +62,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define M [fp, #-252 ] #define M [fp, #-252 ]
#define A [fp, #-256 ] #define A [fp, #-256 ]
#define FP_ZERO [fp, #-228]
#define FP_ZERO_0 [fp, #-228]
#define FP_ZERO_1 [fp, #-224]
#define X_PRE 64 #define X_PRE 64
#define Y_PRE 0 #define Y_PRE 0
@ -79,7 +83,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE ]
pld [ YO , #Y_PRE+32 ] pld [ YO , #Y_PRE+32 ]
vsub.f64 d24 , d24 , d24 fldd d24 , FP_ZERO
vmov.f64 d25 , d24 vmov.f64 d25 , d24
vmov.f64 d26 , d24 vmov.f64 d26 , d24
vmov.f64 d27 , d24 vmov.f64 d27 , d24
@ -147,7 +151,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_F1 .macro INIT_F1
vsub.f64 d24 , d24 , d24 fldd d24 , FP_ZERO
.endm .endm
@ -175,7 +179,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_S8 .macro INIT_S8
vsub.f64 d24 , d24 , d24 fldd d24 , FP_ZERO
vmov.f64 d25 , d24 vmov.f64 d25 , d24
vmov.f64 d26 , d24 vmov.f64 d26 , d24
vmov.f64 d27 , d24 vmov.f64 d27 , d24
@ -269,7 +273,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_S1 .macro INIT_S1
vsub.f64 d24 , d24 , d24 fldd d24 , FP_ZERO
.endm .endm
@ -302,7 +306,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ YO , #Y_PRE ] pld [ YO , #Y_PRE ]
vsub.f32 s24 , s24 , s24 flds s24 , FP_ZERO
vmov.f32 s25 , s24 vmov.f32 s25 , s24
vmov.f32 s26 , s24 vmov.f32 s26 , s24
vmov.f32 s27 , s24 vmov.f32 s27 , s24
@ -368,7 +372,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_F1 .macro INIT_F1
vsub.f32 s24 , s24 , s24 flds s24 , FP_ZERO
.endm .endm
@ -396,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_S8 .macro INIT_S8
vsub.f32 s24 , s24 , s24 flds s24 , FP_ZERO
vmov.f32 s25 , s24 vmov.f32 s25 , s24
vmov.f32 s26 , s24 vmov.f32 s26 , s24
vmov.f32 s27 , s24 vmov.f32 s27 , s24
@ -489,7 +493,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_S1 .macro INIT_S1
vsub.f32 s24 , s24 , s24 flds s24 , FP_ZERO
.endm .endm
@ -538,6 +542,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vstm r12, { s8 - s31 } // store floating point registers vstm r12, { s8 - s31 } // store floating point registers
#endif #endif
movs r12, #0
str r12, FP_ZERO
str r12, FP_ZERO_1
cmp OLD_M, #0 cmp OLD_M, #0
ble gemvn_kernel_L999 ble gemvn_kernel_L999