arm: add softfp support in dgemm/dtrmm vfp kernels
This commit is contained in:
parent
872a11a2bf
commit
09bc6ebe5b
|
@ -72,7 +72,6 @@ DGEMVTKERNEL = gemv_t_vfp.S
|
||||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||||
|
|
||||||
SGEMMKERNEL = ../generic/gemmkernel_4x2.c
|
|
||||||
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
||||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
|
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
|
||||||
|
@ -85,7 +84,7 @@ SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
DGEMMKERNEL = ../generic/gemmkernel_4x2.c
|
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
|
||||||
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||||
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
|
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
|
||||||
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
|
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
|
||||||
|
@ -97,9 +96,8 @@ DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
STRMMKERNEL = ../generic/trmmkernel_4x2.c
|
|
||||||
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
||||||
DTRMMKERNEL = ../generic/trmmkernel_4x2.c
|
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
||||||
|
|
||||||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
||||||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
||||||
|
@ -113,11 +111,9 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
|
||||||
ifeq ($(ARM_ABI),hard)
|
ifeq ($(ARM_ABI),hard)
|
||||||
|
|
||||||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
|
||||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
|
|
||||||
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
|
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
|
||||||
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S
|
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,8 @@ ZNRM2KERNEL = nrm2_vfpv3.S
|
||||||
SGEMVNKERNEL = gemv_n_vfpv3.S
|
SGEMVNKERNEL = gemv_n_vfpv3.S
|
||||||
DGEMVNKERNEL = gemv_n_vfpv3.S
|
DGEMVNKERNEL = gemv_n_vfpv3.S
|
||||||
|
|
||||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c
|
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
||||||
DTRMMKERNEL = ../generic/trmmkernel_4x4.c
|
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
||||||
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
||||||
|
@ -17,7 +17,7 @@ SGEMMOTCOPY = sgemm_tcopy_4_vfp.S
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
DGEMMKERNEL = ../generic/gemmkernel_4x4.c
|
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
|
||||||
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
|
DGEMMONCOPY = dgemm_ncopy_4_vfp.S
|
||||||
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
|
DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
@ -25,13 +25,9 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
ifeq ($(ARM_ABI),hard)
|
ifeq ($(ARM_ABI),hard)
|
||||||
|
|
||||||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
|
||||||
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
|
||||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
||||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S
|
|
||||||
|
|
||||||
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
|
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
|
||||||
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
|
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
|
||||||
|
|
||||||
|
|
|
@ -62,10 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -429,6 +436,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -79,9 +79,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -878,6 +886,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-276 ]
|
#define ALPHA [fp, #-276 ]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define OLD_C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#define OFFSET [fp, #28 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define OLD_C [fp, #8 ]
|
#define OLD_C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -404,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -66,10 +66,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-276 ]
|
#define ALPHA [fp, #-276 ]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define OLD_C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#define OFFSET [fp, #28 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define OLD_C [fp, #8 ]
|
#define OLD_C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -846,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
Loading…
Reference in New Issue