arm: add softfp support in vfp gemv kernels
This commit is contained in:
parent
83bd547517
commit
8f83d3f961
|
@ -62,6 +62,16 @@ DSWAPKERNEL = swap_vfp.S
|
|||
CSWAPKERNEL = swap_vfp.S
|
||||
ZSWAPKERNEL = swap_vfp.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
SGEMMKERNEL = ../generic/gemmkernel_4x2.c
|
||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
|
||||
|
@ -101,16 +111,6 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
|||
|
||||
ifeq ($(ARM_ABI),hard)
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfp.S
|
||||
DGEMVNKERNEL = gemv_n_vfp.S
|
||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
||||
|
||||
SGEMVTKERNEL = gemv_t_vfp.S
|
||||
DGEMVTKERNEL = gemv_t_vfp.S
|
||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
||||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||
|
|
|
@ -5,6 +5,9 @@ DNRM2KERNEL = nrm2_vfpv3.S
|
|||
CNRM2KERNEL = nrm2_vfpv3.S
|
||||
ZNRM2KERNEL = nrm2_vfpv3.S
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfpv3.S
|
||||
DGEMVNKERNEL = gemv_n_vfpv3.S
|
||||
|
||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c
|
||||
DTRMMKERNEL = ../generic/trmmkernel_4x4.c
|
||||
|
||||
|
@ -22,9 +25,6 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
|||
|
||||
ifeq ($(ARM_ABI),hard)
|
||||
|
||||
SGEMVNKERNEL = gemv_n_vfpv3.S
|
||||
DGEMVNKERNEL = gemv_n_vfpv3.S
|
||||
|
||||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
||||
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#define OLD_ALPHAR r3
|
||||
#define OLD_ALPHAI [fp, #0 ]
|
||||
#define OLD_A_SOFTFP [fp, #4 ]
|
||||
#define OLD_LDA [fp, #8 ]
|
||||
#define X [fp, #12 ]
|
||||
#define OLD_INC_X [fp, #16 ]
|
||||
#define Y [fp, #20 ]
|
||||
#define OLD_INC_Y [fp, #24 ]
|
||||
#else
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_M r0
|
||||
|
||||
|
@ -462,6 +474,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp N, #0
|
||||
ble cgemvn_kernel_L999
|
||||
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
vmov s0, OLD_ALPHAR
|
||||
vldr s1, OLD_ALPHAI
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
str OLD_M, M
|
||||
vstr s0 , ALPHA_R
|
||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#define OLD_ALPHAR r3
|
||||
#define OLD_ALPHAI [fp, #0 ]
|
||||
#define OLD_A_SOFTFP [fp, #4 ]
|
||||
#define OLD_LDA [fp, #8 ]
|
||||
#define X [fp, #12 ]
|
||||
#define OLD_INC_X [fp, #16 ]
|
||||
#define Y [fp, #20 ]
|
||||
#define OLD_INC_Y [fp, #24 ]
|
||||
#else
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_N r1
|
||||
|
||||
|
@ -359,6 +371,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp OLD_N, #0
|
||||
ble cgemvt_kernel_L999
|
||||
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
vmov s0, OLD_ALPHAR
|
||||
vldr s1, OLD_ALPHAI
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
str OLD_N, N
|
||||
|
||||
|
|
|
@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define OLD_ALPHA r3
|
||||
#define OLD_A_SOFTFP [fp, #0 ]
|
||||
#define OLD_LDA [fp, #4 ]
|
||||
#define X [fp, #8 ]
|
||||
#define OLD_INC_X [fp, #12 ]
|
||||
#define Y [fp, #16 ]
|
||||
#define OLD_INC_Y [fp, #20 ]
|
||||
#else
|
||||
#define OLD_ALPHA [fp, #0 ]
|
||||
#define OLD_A_SOFTFP [fp, #8 ]
|
||||
#define OLD_LDA [fp, #12]
|
||||
#define X [fp, #16]
|
||||
#define OLD_INC_X [fp, #20]
|
||||
#define Y [fp, #24]
|
||||
#define OLD_INC_Y [fp, #28]
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_M r0
|
||||
|
||||
|
@ -508,6 +533,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp N, #0
|
||||
ble gemvn_kernel_L999
|
||||
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#if !defined(DOUBLE)
|
||||
vmov s0, OLD_ALPHA
|
||||
#else
|
||||
vldr d0, OLD_ALPHA
|
||||
#endif
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
str OLD_M, M
|
||||
|
||||
|
|
|
@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#ifndef ARM_SOFTFP_ABI
|
||||
//hard
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#define OLD_A r3
|
||||
#else
|
||||
#define OLD_A_SOFTFP [fp, #0 ]
|
||||
#define OLD_LDA [fp, #4 ]
|
||||
#define X [fp, #8 ]
|
||||
#define OLD_INC_X [fp, #12 ]
|
||||
#define Y [fp, #16 ]
|
||||
#define OLD_INC_Y [fp, #20 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define OLD_ALPHA r3
|
||||
#define OLD_A r3
|
||||
#define OLD_A_SOFTFP [fp, #0 ]
|
||||
#define OLD_LDA [fp, #4 ]
|
||||
#define X [fp, #8 ]
|
||||
#define OLD_INC_X [fp, #12 ]
|
||||
#define Y [fp, #16 ]
|
||||
#define OLD_INC_Y [fp, #20 ]
|
||||
#else
|
||||
#define OLD_ALPHA [fp, #0 ]
|
||||
#define OLD_A_SOFTFP [fp, #8 ]
|
||||
#define OLD_LDA [fp, #12]
|
||||
#define X [fp, #16]
|
||||
#define OLD_INC_X [fp, #20]
|
||||
#define Y [fp, #24]
|
||||
#define OLD_INC_Y [fp, #28]
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_M r0
|
||||
|
||||
#define AO1 r0
|
||||
|
@ -565,18 +577,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp N, #0
|
||||
ble gemvn_kernel_L999
|
||||
|
||||
#ifndef DOUBLE
|
||||
#ifdef ARM_SOFTFP_ABI
|
||||
|
||||
vmov s0, OLD_ALPHA
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#if !defined(DOUBLE)
|
||||
vmov s0, OLD_ALPHA
|
||||
#else
|
||||
vldr d0, OLD_ALPHA
|
||||
#endif
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
str OLD_M, M
|
||||
|
||||
|
||||
|
||||
ldr INC_X , OLD_INC_X
|
||||
ldr INC_Y , OLD_INC_Y
|
||||
|
||||
|
|
|
@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#ifndef ARM_SOFTFP_ABI
|
||||
//hard abi
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#define OLD_A r3
|
||||
#else
|
||||
#define OLD_A_SOFTFP [fp, #0 ]
|
||||
#define OLD_LDA [fp, #4 ]
|
||||
#define X [fp, #8 ]
|
||||
#define OLD_INC_X [fp, #12 ]
|
||||
#define Y [fp, #16 ]
|
||||
#define OLD_INC_Y [fp, #20 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define OLD_ALPHA r3
|
||||
#define OLD_A r3
|
||||
#define OLD_A_SOFTFP [fp, #0 ]
|
||||
#define OLD_LDA [fp, #4 ]
|
||||
#define X [fp, #8 ]
|
||||
#define OLD_INC_X [fp, #12 ]
|
||||
#define Y [fp, #16 ]
|
||||
#define OLD_INC_Y [fp, #20 ]
|
||||
#else
|
||||
#define OLD_ALPHA [fp, #0 ]
|
||||
#define OLD_A_SOFTFP [fp, #8 ]
|
||||
#define OLD_LDA [fp, #12]
|
||||
#define X [fp, #16]
|
||||
#define OLD_INC_X [fp, #20]
|
||||
#define Y [fp, #24]
|
||||
#define OLD_INC_Y [fp, #28]
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_N r1
|
||||
|
||||
#define M r0
|
||||
|
@ -518,11 +530,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp OLD_N, #0
|
||||
ble gemvt_kernel_L999
|
||||
|
||||
#ifndef DOUBLE
|
||||
#ifdef ARM_SOFTFP_ABI
|
||||
vmov s0, OLD_ALPHA
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#if !defined(DOUBLE)
|
||||
vmov s0, OLD_ALPHA
|
||||
#else
|
||||
vldr d0, OLD_ALPHA
|
||||
#endif
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
|
|
|
@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define OLD_ALPHA r3
|
||||
#define OLD_A_SOFTFP [fp, #0 ]
|
||||
#define OLD_LDA [fp, #4 ]
|
||||
#define X [fp, #8 ]
|
||||
#define OLD_INC_X [fp, #12 ]
|
||||
#define Y [fp, #16 ]
|
||||
#define OLD_INC_Y [fp, #20 ]
|
||||
#else
|
||||
#define OLD_ALPHA [fp, #0 ]
|
||||
#define OLD_A_SOFTFP [fp, #8 ]
|
||||
#define OLD_LDA [fp, #12]
|
||||
#define X [fp, #16]
|
||||
#define OLD_INC_X [fp, #20]
|
||||
#define Y [fp, #24]
|
||||
#define OLD_INC_Y [fp, #28]
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_N r1
|
||||
|
||||
|
@ -476,6 +501,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp OLD_N, #0
|
||||
ble gemvt_kernel_L999
|
||||
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#if !defined(DOUBLE)
|
||||
vmov s0, OLD_ALPHA
|
||||
#else
|
||||
vldr d0, OLD_ALPHA
|
||||
#endif
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
str OLD_N, N
|
||||
|
||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#define OLD_ALPHAR [fp, #0 ]
|
||||
#define OLD_ALPHAI [fp, #8 ]
|
||||
#define OLD_A_SOFTFP [fp, #16]
|
||||
#define OLD_LDA [fp, #20]
|
||||
#define X [fp, #24]
|
||||
#define OLD_INC_X [fp, #28]
|
||||
#define Y [fp, #32]
|
||||
#define OLD_INC_Y [fp, #36]
|
||||
#else
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_M r0
|
||||
|
||||
|
@ -465,6 +477,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp N, #0
|
||||
ble zgemvn_kernel_L999
|
||||
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
vldr d0, OLD_ALPHAR
|
||||
vldr d1, OLD_ALPHAI
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
str OLD_M, M
|
||||
vstr d0 , ALPHA_R
|
||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#define STACKSIZE 256
|
||||
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
#define OLD_ALPHAR [fp, #0 ]
|
||||
#define OLD_ALPHAI [fp, #8 ]
|
||||
#define OLD_A_SOFTFP [fp, #16]
|
||||
#define OLD_LDA [fp, #20]
|
||||
#define X [fp, #24]
|
||||
#define OLD_INC_X [fp, #28]
|
||||
#define Y [fp, #32]
|
||||
#define OLD_INC_Y [fp, #36]
|
||||
#else
|
||||
#define OLD_LDA [fp, #0 ]
|
||||
#define X [fp, #4 ]
|
||||
#define OLD_INC_X [fp, #8 ]
|
||||
#define Y [fp, #12 ]
|
||||
#define OLD_INC_Y [fp, #16 ]
|
||||
#endif
|
||||
|
||||
#define OLD_A r3
|
||||
#define OLD_N r1
|
||||
|
||||
|
@ -360,6 +372,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
cmp OLD_N, #0
|
||||
ble zgemvt_kernel_L999
|
||||
|
||||
#if !defined(__ARM_PCS_VFP)
|
||||
vldr d0, OLD_ALPHAR
|
||||
vldr d1, OLD_ALPHAI
|
||||
ldr OLD_A, OLD_A_SOFTFP
|
||||
#endif
|
||||
|
||||
str OLD_A, A
|
||||
str OLD_N, N
|
||||
|
||||
|
|
Loading…
Reference in New Issue