arm: add softfp support in cgemm/ctrmm vfp kernels

This commit is contained in:
Ashwin Sekhar T K 2017-07-02 02:42:32 +05:30
parent 09bc6ebe5b
commit 305cd2e8b4
6 changed files with 63 additions and 4 deletions

View File

@ -98,7 +98,9 @@ DGEMMOTCOPYOBJ = dgemm_otcopy.o
STRMMKERNEL = strmm_kernel_4x2_vfp.S
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
CGEMMONCOPYOBJ = cgemm_oncopy.o
@ -111,10 +113,8 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy.o
ifeq ($(ARM_ABI),hard)
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
CGEMMKERNEL = cgemm_kernel_2x2_vfp.S
ZGEMMKERNEL = zgemm_kernel_2x2_vfp.S
endif

View File

@ -10,6 +10,7 @@ DGEMVNKERNEL = gemv_n_vfpv3.S
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
@ -23,12 +24,12 @@ DGEMMOTCOPY = dgemm_tcopy_4_vfp.S
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
ifeq ($(ARM_ABI),hard)
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
endif

View File

@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
#if !defined(__ARM_PCS_VFP)
#define OLD_ALPHAR_SOFTFP r3
#define OLD_ALPHAI_SOFTFP [fp, #4]
#define OLD_A_SOFTFP [fp, #8 ]
#define B [fp, #12 ]
#define C [fp, #16 ]
#define OLD_LDC [fp, #20 ]
#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#endif
#define I r0
#define J r1
@ -816,6 +825,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
#if !defined(__ARM_PCS_VFP)
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K

View File

@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
#if !defined(__ARM_PCS_VFP)
#define OLD_ALPHAR_SOFTFP r3
#define OLD_ALPHAI_SOFTFP [fp, #4]
#define OLD_A_SOFTFP [fp, #8 ]
#define B [fp, #12 ]
#define C [fp, #16 ]
#define OLD_LDC [fp, #20 ]
#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#endif
#define I r0
#define J r1
@ -873,6 +882,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
#if !defined(__ARM_PCS_VFP)
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K

View File

@ -67,10 +67,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
#if !defined(__ARM_PCS_VFP)
#define OLD_ALPHAR_SOFTFP r3
#define OLD_ALPHAI_SOFTFP [fp, #4]
#define OLD_A_SOFTFP [fp, #8 ]
#define B [fp, #12 ]
#define C [fp, #16 ]
#define OLD_LDC [fp, #20 ]
#define OFFSET [fp, #24 ]
#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
#endif
#define I r0
#define J r1
@ -826,6 +836,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
#if !defined(__ARM_PCS_VFP)
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K

View File

@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ALPHA_I [fp, #-272]
#define ALPHA_R [fp, #-280]
#if !defined(__ARM_PCS_VFP)
#define OLD_ALPHAR_SOFTFP r3
#define OLD_ALPHAI_SOFTFP [fp, #4]
#define OLD_A_SOFTFP [fp, #8 ]
#define B [fp, #12 ]
#define C [fp, #16 ]
#define OLD_LDC [fp, #20 ]
#define OFFSET [fp, #24 ]
#else
#define B [fp, #4 ]
#define C [fp, #8 ]
#define OLD_LDC [fp, #12 ]
#define OFFSET [fp, #16 ]
#endif
#define I r0
#define J r1
@ -846,6 +856,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add fp, sp, #24
sub sp, sp, #STACKSIZE // reserve stack
#if !defined(__ARM_PCS_VFP)
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
ldr OLD_A, OLD_A_SOFTFP
#endif
str OLD_M, M
str OLD_N, N
str OLD_K, K