Merge pull request #1221 from ashwinyes/develop_arm_softfp
arm: add support for softfp in arm vfp assembly files
This commit is contained in:
commit
a590e6135c
23
Makefile.arm
23
Makefile.arm
|
@ -1,5 +1,4 @@
|
||||||
#ifeq logical or
|
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
|
||||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15))
|
|
||||||
ifeq ($(OSNAME), Android)
|
ifeq ($(OSNAME), Android)
|
||||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
||||||
|
@ -9,28 +8,12 @@ FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(CORE), ARMV7)
|
|
||||||
ifeq ($(OSNAME), Android)
|
|
||||||
ifeq ($(ARM_SOFTFP_ABI), 1)
|
|
||||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a
|
|
||||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a
|
|
||||||
else
|
|
||||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch
|
|
||||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a -Wl,--no-warn-mismatch
|
|
||||||
endif
|
|
||||||
else
|
|
||||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
|
||||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
|
|
||||||
endif
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifeq ($(CORE), ARMV6)
|
ifeq ($(CORE), ARMV6)
|
||||||
CCOMMON_OPT += -mfpu=vfp -march=armv6
|
CCOMMON_OPT += -mfpu=vfp -march=armv6
|
||||||
FCOMMON_OPT += -mfpu=vfp -march=armv6
|
FCOMMON_OPT += -mfpu=vfp -march=armv6
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
ifeq ($(CORE), ARMV5)
|
ifeq ($(CORE), ARMV5)
|
||||||
CCOMMON_OPT += -marm -march=armv5
|
CCOMMON_OPT += -march=armv5
|
||||||
FCOMMON_OPT += -marm -march=armv5
|
FCOMMON_OPT += -march=armv5
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -242,6 +242,10 @@ EXTRALIB += -lm
|
||||||
NO_EXPRECISION = 1
|
NO_EXPRECISION = 1
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(OSNAME), Android)
|
||||||
|
EXTRALIB += -lm
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), AIX)
|
ifeq ($(OSNAME), AIX)
|
||||||
EXTRALIB += -lm
|
EXTRALIB += -lm
|
||||||
endif
|
endif
|
||||||
|
@ -486,12 +490,10 @@ BINARY_DEFINED = 1
|
||||||
CCOMMON_OPT += -marm
|
CCOMMON_OPT += -marm
|
||||||
FCOMMON_OPT += -marm
|
FCOMMON_OPT += -marm
|
||||||
|
|
||||||
|
# If softfp abi is mentioned on the command line, force it.
|
||||||
ifeq ($(ARM_SOFTFP_ABI), 1)
|
ifeq ($(ARM_SOFTFP_ABI), 1)
|
||||||
CCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI
|
CCOMMON_OPT += -mfloat-abi=softfp
|
||||||
FCOMMON_OPT += -mfloat-abi=softfp -DARM_SOFTFP_ABI
|
FCOMMON_OPT += -mfloat-abi=softfp
|
||||||
else
|
|
||||||
CCOMMON_OPT += -mfloat-abi=hard
|
|
||||||
FCOMMON_OPT += -mfloat-abi=hard
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq ($(OSNAME), Android)
|
ifeq ($(OSNAME), Android)
|
||||||
|
|
|
@ -111,11 +111,6 @@ REALNAME:
|
||||||
|
|
||||||
#define PROFCODE
|
#define PROFCODE
|
||||||
|
|
||||||
#ifdef __ARM_PCS
|
|
||||||
//-mfloat-abi=softfp
|
|
||||||
#define SOFT_FLOAT_ABI
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
|
include $(KERNELDIR)/KERNEL.ARMV5
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
SAMAXKERNEL = iamax_vfp.S
|
SAMAXKERNEL = iamax_vfp.S
|
||||||
DAMAXKERNEL = iamax_vfp.S
|
DAMAXKERNEL = iamax_vfp.S
|
||||||
CAMAXKERNEL = iamax_vfp.S
|
CAMAXKERNEL = iamax_vfp.S
|
||||||
|
@ -44,10 +42,10 @@ DAXPYKERNEL = axpy_vfp.S
|
||||||
CAXPYKERNEL = axpy_vfp.S
|
CAXPYKERNEL = axpy_vfp.S
|
||||||
ZAXPYKERNEL = axpy_vfp.S
|
ZAXPYKERNEL = axpy_vfp.S
|
||||||
|
|
||||||
SCOPYKERNEL = copy.c
|
SROTKERNEL = rot_vfp.S
|
||||||
DCOPYKERNEL = copy.c
|
DROTKERNEL = rot_vfp.S
|
||||||
CCOPYKERNEL = zcopy.c
|
CROTKERNEL = rot_vfp.S
|
||||||
ZCOPYKERNEL = zcopy.c
|
ZROTKERNEL = rot_vfp.S
|
||||||
|
|
||||||
SDOTKERNEL = sdot_vfp.S
|
SDOTKERNEL = sdot_vfp.S
|
||||||
DDOTKERNEL = ddot_vfp.S
|
DDOTKERNEL = ddot_vfp.S
|
||||||
|
@ -59,16 +57,6 @@ DNRM2KERNEL = nrm2_vfp.S
|
||||||
CNRM2KERNEL = nrm2_vfp.S
|
CNRM2KERNEL = nrm2_vfp.S
|
||||||
ZNRM2KERNEL = nrm2_vfp.S
|
ZNRM2KERNEL = nrm2_vfp.S
|
||||||
|
|
||||||
SROTKERNEL = rot_vfp.S
|
|
||||||
DROTKERNEL = rot_vfp.S
|
|
||||||
CROTKERNEL = rot_vfp.S
|
|
||||||
ZROTKERNEL = rot_vfp.S
|
|
||||||
|
|
||||||
SSCALKERNEL = scal.c
|
|
||||||
DSCALKERNEL = scal.c
|
|
||||||
CSCALKERNEL = zscal.c
|
|
||||||
ZSCALKERNEL = zscal.c
|
|
||||||
|
|
||||||
SSWAPKERNEL = swap_vfp.S
|
SSWAPKERNEL = swap_vfp.S
|
||||||
DSWAPKERNEL = swap_vfp.S
|
DSWAPKERNEL = swap_vfp.S
|
||||||
CSWAPKERNEL = swap_vfp.S
|
CSWAPKERNEL = swap_vfp.S
|
||||||
|
@ -84,26 +72,25 @@ DGEMVTKERNEL = gemv_t_vfp.S
|
||||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
CGEMVTKERNEL = cgemv_t_vfp.S
|
||||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
ZGEMVTKERNEL = zgemv_t_vfp.S
|
||||||
|
|
||||||
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
|
||||||
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
|
||||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
|
||||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
SGEMMKERNEL = sgemm_kernel_4x2_vfp.S
|
||||||
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
|
SGEMMINCOPY = sgemm_ncopy_4_vfp.S
|
||||||
SGEMMITCOPY = sgemm_tcopy_4_vfp.S
|
SGEMMITCOPY = sgemm_tcopy_4_vfp.S
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||||
|
endif
|
||||||
SGEMMONCOPY = sgemm_ncopy_2_vfp.S
|
SGEMMONCOPY = sgemm_ncopy_2_vfp.S
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
|
DGEMMKERNEL = dgemm_kernel_4x2_vfp.S
|
||||||
|
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||||
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
|
DGEMMINCOPY = dgemm_ncopy_4_vfp.S
|
||||||
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
|
DGEMMITCOPY = dgemm_tcopy_4_vfp.S
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||||
|
endif
|
||||||
DGEMMONCOPY = dgemm_ncopy_2_vfp.S
|
DGEMMONCOPY = dgemm_ncopy_2_vfp.S
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
@ -121,26 +108,8 @@ ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
|
||||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
STRMMKERNEL = strmm_kernel_4x2_vfp.S
|
||||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
DTRMMKERNEL = dtrmm_kernel_4x2_vfp.S
|
||||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
CTRMMKERNEL = ctrmm_kernel_2x2_vfp.S
|
||||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
ZTRMMKERNEL = ztrmm_kernel_2x2_vfp.S
|
||||||
|
|
||||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
|
||||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|
||||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
|
||||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|
||||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|
||||||
|
|
||||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
|
||||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|
||||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,91 +1,12 @@
|
||||||
|
include $(KERNELDIR)/KERNEL.ARMV6
|
||||||
#################################################################################
|
|
||||||
SAMAXKERNEL = iamax_vfp.S
|
|
||||||
DAMAXKERNEL = iamax_vfp.S
|
|
||||||
CAMAXKERNEL = iamax_vfp.S
|
|
||||||
ZAMAXKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
SAMINKERNEL = iamax_vfp.S
|
|
||||||
DAMINKERNEL = iamax_vfp.S
|
|
||||||
CAMINKERNEL = iamax_vfp.S
|
|
||||||
ZAMINKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
SMAXKERNEL = iamax_vfp.S
|
|
||||||
DMAXKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
SMINKERNEL = iamax_vfp.S
|
|
||||||
DMINKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
ISAMAXKERNEL = iamax_vfp.S
|
|
||||||
IDAMAXKERNEL = iamax_vfp.S
|
|
||||||
ICAMAXKERNEL = iamax_vfp.S
|
|
||||||
IZAMAXKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
ISAMINKERNEL = iamax_vfp.S
|
|
||||||
IDAMINKERNEL = iamax_vfp.S
|
|
||||||
ICAMINKERNEL = iamax_vfp.S
|
|
||||||
IZAMINKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
ISMAXKERNEL = iamax_vfp.S
|
|
||||||
IDMAXKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
ISMINKERNEL = iamax_vfp.S
|
|
||||||
IDMINKERNEL = iamax_vfp.S
|
|
||||||
|
|
||||||
SSWAPKERNEL = swap_vfp.S
|
|
||||||
DSWAPKERNEL = swap_vfp.S
|
|
||||||
CSWAPKERNEL = swap_vfp.S
|
|
||||||
ZSWAPKERNEL = swap_vfp.S
|
|
||||||
|
|
||||||
SASUMKERNEL = asum_vfp.S
|
|
||||||
DASUMKERNEL = asum_vfp.S
|
|
||||||
CASUMKERNEL = asum_vfp.S
|
|
||||||
ZASUMKERNEL = asum_vfp.S
|
|
||||||
|
|
||||||
SAXPYKERNEL = axpy_vfp.S
|
|
||||||
DAXPYKERNEL = axpy_vfp.S
|
|
||||||
CAXPYKERNEL = axpy_vfp.S
|
|
||||||
ZAXPYKERNEL = axpy_vfp.S
|
|
||||||
|
|
||||||
SCOPYKERNEL = copy.c
|
|
||||||
DCOPYKERNEL = copy.c
|
|
||||||
CCOPYKERNEL = zcopy.c
|
|
||||||
ZCOPYKERNEL = zcopy.c
|
|
||||||
|
|
||||||
SDOTKERNEL = sdot_vfp.S
|
|
||||||
DDOTKERNEL = ddot_vfp.S
|
|
||||||
CDOTKERNEL = cdot_vfp.S
|
|
||||||
ZDOTKERNEL = zdot_vfp.S
|
|
||||||
|
|
||||||
SNRM2KERNEL = nrm2_vfpv3.S
|
SNRM2KERNEL = nrm2_vfpv3.S
|
||||||
DNRM2KERNEL = nrm2_vfpv3.S
|
DNRM2KERNEL = nrm2_vfpv3.S
|
||||||
CNRM2KERNEL = nrm2_vfpv3.S
|
CNRM2KERNEL = nrm2_vfpv3.S
|
||||||
ZNRM2KERNEL = nrm2_vfpv3.S
|
ZNRM2KERNEL = nrm2_vfpv3.S
|
||||||
|
|
||||||
SROTKERNEL = rot_vfp.S
|
|
||||||
DROTKERNEL = rot_vfp.S
|
|
||||||
CROTKERNEL = rot_vfp.S
|
|
||||||
ZROTKERNEL = rot_vfp.S
|
|
||||||
|
|
||||||
SSCALKERNEL = scal.c
|
|
||||||
DSCALKERNEL = scal.c
|
|
||||||
CSCALKERNEL = zscal.c
|
|
||||||
ZSCALKERNEL = zscal.c
|
|
||||||
|
|
||||||
SGEMVNKERNEL = gemv_n_vfpv3.S
|
SGEMVNKERNEL = gemv_n_vfpv3.S
|
||||||
DGEMVNKERNEL = gemv_n_vfpv3.S
|
DGEMVNKERNEL = gemv_n_vfpv3.S
|
||||||
CGEMVNKERNEL = cgemv_n_vfp.S
|
|
||||||
ZGEMVNKERNEL = zgemv_n_vfp.S
|
|
||||||
|
|
||||||
SGEMVTKERNEL = gemv_t_vfp.S
|
|
||||||
DGEMVTKERNEL = gemv_t_vfp.S
|
|
||||||
CGEMVTKERNEL = cgemv_t_vfp.S
|
|
||||||
ZGEMVTKERNEL = zgemv_t_vfp.S
|
|
||||||
|
|
||||||
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
|
||||||
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
|
||||||
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
|
||||||
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S
|
||||||
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
SGEMMONCOPY = sgemm_ncopy_4_vfp.S
|
||||||
|
@ -100,35 +21,10 @@ DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
|
CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S
|
||||||
CGEMMONCOPY = cgemm_ncopy_2_vfp.S
|
|
||||||
CGEMMOTCOPY = cgemm_tcopy_2_vfp.S
|
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
|
||||||
|
|
||||||
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
|
ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S
|
||||||
ZGEMMONCOPY = zgemm_ncopy_2_vfp.S
|
|
||||||
ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S
|
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
|
||||||
|
|
||||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
|
||||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|
||||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|
||||||
|
|
||||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
|
||||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|
||||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|
||||||
|
|
||||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
|
||||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|
||||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|
||||||
|
|
||||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
|
||||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
|
||||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
|
||||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
|
||||||
|
|
||||||
|
STRMMKERNEL = strmm_kernel_4x4_vfpv3.S
|
||||||
|
DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S
|
||||||
|
CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S
|
||||||
|
ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S
|
||||||
|
|
||||||
|
|
|
@ -475,6 +475,14 @@ asum_kernel_L999:
|
||||||
vadd.f32 s0 , s0, s1 // set return value
|
vadd.f32 s0 , s0, s1 // set return value
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
vmov r0, s0
|
||||||
|
#else
|
||||||
|
vmov r0, r1, d0
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
|
@ -38,18 +38,52 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#ifndef ARM_SOFTFP_ABI
|
#if !defined(__ARM_PCS_VFP)
|
||||||
//hard
|
|
||||||
#define OLD_INC_X [fp, #0 ]
|
#if !defined(COMPLEX)
|
||||||
#define OLD_Y [fp, #4 ]
|
|
||||||
#define OLD_INC_Y [fp, #8 ]
|
#if !defined(DOUBLE)
|
||||||
#else
|
#define OLD_ALPHA r3
|
||||||
#define OLD_X [fp, #0 ]
|
#define OLD_X [fp, #0 ]
|
||||||
#define OLD_INC_X [fp, #4 ]
|
#define OLD_INC_X [fp, #4 ]
|
||||||
#define OLD_Y [fp, #8 ]
|
#define OLD_Y [fp, #8 ]
|
||||||
#define OLD_INC_Y [fp, #12 ]
|
#define OLD_INC_Y [fp, #12 ]
|
||||||
|
#else
|
||||||
|
#define OLD_ALPHA [fp, #0]
|
||||||
|
#define OLD_X [fp, #8 ]
|
||||||
|
#define OLD_INC_X [fp, #12 ]
|
||||||
|
#define OLD_Y [fp, #16 ]
|
||||||
|
#define OLD_INC_Y [fp, #20 ]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#else //COMPLEX
|
||||||
|
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
#define OLD_ALPHAR r3
|
||||||
|
#define OLD_ALPHAI [fp, #0 ]
|
||||||
|
#define OLD_X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define OLD_Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
#else
|
||||||
|
#define OLD_ALPHAR [fp, #0]
|
||||||
|
#define OLD_ALPHAI [fp, #8]
|
||||||
|
#define OLD_X [fp, #16 ]
|
||||||
|
#define OLD_INC_X [fp, #20 ]
|
||||||
|
#define OLD_Y [fp, #24 ]
|
||||||
|
#define OLD_INC_Y [fp, #28 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif //!defined(COMPLEX)
|
||||||
|
|
||||||
|
#else //__ARM_PCS_VFP
|
||||||
|
|
||||||
|
#define OLD_INC_X [fp, #0 ]
|
||||||
|
#define OLD_Y [fp, #4 ]
|
||||||
|
#define OLD_INC_Y [fp, #8 ]
|
||||||
|
|
||||||
|
#endif //!defined(__ARM_PCS_VFP)
|
||||||
|
|
||||||
#define N r0
|
#define N r0
|
||||||
#define Y r1
|
#define Y r1
|
||||||
#define INC_X r2
|
#define INC_X r2
|
||||||
|
@ -370,13 +404,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #8
|
add fp, sp, #8
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
#ifdef ARM_SOFTFP_ABI
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#ifndef DOUBLE
|
#if !defined(COMPLEX)
|
||||||
vmov s0, r3 //move alpha to s0
|
#if !defined(DOUBLE)
|
||||||
|
vmov s0, OLD_ALPHA
|
||||||
|
ldr X, OLD_X
|
||||||
|
#else
|
||||||
|
vldr d0, OLD_ALPHA
|
||||||
ldr X, OLD_X
|
ldr X, OLD_X
|
||||||
#endif
|
#endif
|
||||||
|
#else //COMPLEX
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
vmov s0, OLD_ALPHAR
|
||||||
|
vldr s1, OLD_ALPHAI
|
||||||
|
ldr X, OLD_X
|
||||||
|
#else
|
||||||
|
vldr d0, OLD_ALPHAR
|
||||||
|
vldr d1, OLD_ALPHAI
|
||||||
|
ldr X, OLD_X
|
||||||
#endif
|
#endif
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
ldr INC_X , OLD_INC_X
|
ldr INC_X , OLD_INC_X
|
||||||
ldr Y, OLD_Y
|
ldr Y, OLD_Y
|
||||||
ldr INC_Y , OLD_INC_Y
|
ldr INC_Y , OLD_INC_Y
|
||||||
|
|
|
@ -41,8 +41,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define N r0
|
#define N r0
|
||||||
#define X r1
|
#define X r1
|
||||||
#define INC_X r2
|
#define INC_X r2
|
||||||
#define OLD_Y r3
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************************
|
/******************************************************
|
||||||
* [fp, #-128] - [fp, #-64] is reserved
|
* [fp, #-128] - [fp, #-64] is reserved
|
||||||
|
@ -50,7 +48,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
* registers
|
* registers
|
||||||
*******************************************************/
|
*******************************************************/
|
||||||
|
|
||||||
#define OLD_INC_Y [fp, #4 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_RETURN_ADDR r0
|
||||||
|
#define OLD_N r1
|
||||||
|
#define OLD_X r2
|
||||||
|
#define OLD_INC_X r3
|
||||||
|
#define OLD_Y [fp, #0 ]
|
||||||
|
#define OLD_INC_Y [fp, #4 ]
|
||||||
|
#define RETURN_ADDR r8
|
||||||
|
#else
|
||||||
|
#define OLD_Y r3
|
||||||
|
#define OLD_INC_Y [fp, #0 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r5
|
#define I r5
|
||||||
#define Y r6
|
#define Y r6
|
||||||
|
@ -179,7 +188,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
push {r4 - r9, fp}
|
push {r4 - r9, fp}
|
||||||
add fp, sp, #24
|
add fp, sp, #28
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
sub r4, fp, #128
|
sub r4, fp, #128
|
||||||
|
@ -191,8 +200,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
vmov s2, s0
|
vmov s2, s0
|
||||||
vmov s3, s0
|
vmov s3, s0
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
mov RETURN_ADDR, OLD_RETURN_ADDR
|
||||||
|
mov N, OLD_N
|
||||||
|
mov X, OLD_X
|
||||||
|
mov INC_X, OLD_INC_X
|
||||||
|
ldr Y, OLD_Y
|
||||||
|
ldr INC_Y, OLD_INC_Y
|
||||||
|
#else
|
||||||
mov Y, OLD_Y
|
mov Y, OLD_Y
|
||||||
ldr INC_Y, OLD_INC_Y
|
ldr INC_Y, OLD_INC_Y
|
||||||
|
#endif
|
||||||
|
|
||||||
cmp N, #0
|
cmp N, #0
|
||||||
ble cdot_kernel_L999
|
ble cdot_kernel_L999
|
||||||
|
@ -265,7 +283,6 @@ cdot_kernel_S10:
|
||||||
|
|
||||||
|
|
||||||
cdot_kernel_L999:
|
cdot_kernel_L999:
|
||||||
|
|
||||||
sub r3, fp, #128
|
sub r3, fp, #128
|
||||||
vldm r3, { s8 - s15} // restore floating point registers
|
vldm r3, { s8 - s15} // restore floating point registers
|
||||||
|
|
||||||
|
@ -276,8 +293,11 @@ cdot_kernel_L999:
|
||||||
vadd.f32 s0 , s0, s2
|
vadd.f32 s0 , s0, s2
|
||||||
vsub.f32 s1 , s1, s3
|
vsub.f32 s1 , s1, s3
|
||||||
#endif
|
#endif
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vstm RETURN_ADDR, {s0 - s1}
|
||||||
|
#endif
|
||||||
|
|
||||||
sub sp, fp, #24
|
sub sp, fp, #28
|
||||||
pop {r4 - r9, fp}
|
pop {r4 - r9, fp}
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
|
|
|
@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP r3
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define B [fp, #12 ]
|
||||||
|
#define C [fp, #16 ]
|
||||||
|
#define OLD_LDC [fp, #20 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -816,6 +825,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP r3
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define B [fp, #12 ]
|
||||||
|
#define C [fp, #16 ]
|
||||||
|
#define OLD_LDC [fp, #20 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -873,6 +882,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#define X [fp, #4 ]
|
#define OLD_ALPHAR r3
|
||||||
#define OLD_INC_X [fp, #8 ]
|
#define OLD_ALPHAI [fp, #0 ]
|
||||||
#define Y [fp, #12 ]
|
#define OLD_A_SOFTFP [fp, #4 ]
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
#define OLD_LDA [fp, #8 ]
|
||||||
|
#define X [fp, #12 ]
|
||||||
|
#define OLD_INC_X [fp, #16 ]
|
||||||
|
#define Y [fp, #20 ]
|
||||||
|
#define OLD_INC_Y [fp, #24 ]
|
||||||
|
#else
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define OLD_A r3
|
#define OLD_A r3
|
||||||
#define OLD_M r0
|
#define OLD_M r0
|
||||||
|
|
||||||
|
@ -462,6 +474,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp N, #0
|
cmp N, #0
|
||||||
ble cgemvn_kernel_L999
|
ble cgemvn_kernel_L999
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov s0, OLD_ALPHAR
|
||||||
|
vldr s1, OLD_ALPHAI
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
vstr s0 , ALPHA_R
|
vstr s0 , ALPHA_R
|
||||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#define X [fp, #4 ]
|
#define OLD_ALPHAR r3
|
||||||
#define OLD_INC_X [fp, #8 ]
|
#define OLD_ALPHAI [fp, #0 ]
|
||||||
#define Y [fp, #12 ]
|
#define OLD_A_SOFTFP [fp, #4 ]
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
#define OLD_LDA [fp, #8 ]
|
||||||
|
#define X [fp, #12 ]
|
||||||
|
#define OLD_INC_X [fp, #16 ]
|
||||||
|
#define Y [fp, #20 ]
|
||||||
|
#define OLD_INC_Y [fp, #24 ]
|
||||||
|
#else
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define OLD_A r3
|
#define OLD_A r3
|
||||||
#define OLD_N r1
|
#define OLD_N r1
|
||||||
|
|
||||||
|
@ -359,6 +371,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp OLD_N, #0
|
cmp OLD_N, #0
|
||||||
ble cgemvt_kernel_L999
|
ble cgemvt_kernel_L999
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov s0, OLD_ALPHAR
|
||||||
|
vldr s1, OLD_ALPHAI
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
|
|
||||||
|
|
|
@ -67,10 +67,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP r3
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define B [fp, #12 ]
|
||||||
|
#define C [fp, #16 ]
|
||||||
|
#define OLD_LDC [fp, #20 ]
|
||||||
|
#define OFFSET [fp, #24 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -826,6 +836,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP r3
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define B [fp, #12 ]
|
||||||
|
#define C [fp, #16 ]
|
||||||
|
#define OLD_LDC [fp, #20 ]
|
||||||
|
#define OFFSET [fp, #24 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -846,6 +856,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -246,6 +246,9 @@ ddot_kernel_L999:
|
||||||
vldm r3, { d8 - d15} // restore floating point registers
|
vldm r3, { d8 - d15} // restore floating point registers
|
||||||
|
|
||||||
vadd.f64 d0 , d0, d1 // set return value
|
vadd.f64 d0 , d0, d1 // set return value
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov r0, r1, d0
|
||||||
|
#endif
|
||||||
sub sp, fp, #24
|
sub sp, fp, #24
|
||||||
pop {r4 - r9, fp}
|
pop {r4 - r9, fp}
|
||||||
bx lr
|
bx lr
|
||||||
|
|
|
@ -62,10 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -429,6 +436,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -79,9 +79,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -878,6 +886,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-276 ]
|
#define ALPHA [fp, #-276 ]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define OLD_C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#define OFFSET [fp, #28 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define OLD_C [fp, #8 ]
|
#define OLD_C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -404,6 +413,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -66,10 +66,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-276 ]
|
#define ALPHA [fp, #-276 ]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP [fp, #4]
|
||||||
|
#define OLD_A_SOFTFP [fp, #12 ]
|
||||||
|
#define B [fp, #16 ]
|
||||||
|
#define OLD_C [fp, #20 ]
|
||||||
|
#define OLD_LDC [fp, #24 ]
|
||||||
|
#define OFFSET [fp, #28 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define OLD_C [fp, #8 ]
|
#define OLD_C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -846,6 +855,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#define X [fp, #4 ]
|
|
||||||
#define OLD_INC_X [fp, #8 ]
|
#if !defined(DOUBLE)
|
||||||
#define Y [fp, #12 ]
|
#define OLD_ALPHA r3
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
#define OLD_A_SOFTFP [fp, #0 ]
|
||||||
|
#define OLD_LDA [fp, #4 ]
|
||||||
|
#define X [fp, #8 ]
|
||||||
|
#define OLD_INC_X [fp, #12 ]
|
||||||
|
#define Y [fp, #16 ]
|
||||||
|
#define OLD_INC_Y [fp, #20 ]
|
||||||
|
#else
|
||||||
|
#define OLD_ALPHA [fp, #0 ]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define OLD_LDA [fp, #12]
|
||||||
|
#define X [fp, #16]
|
||||||
|
#define OLD_INC_X [fp, #20]
|
||||||
|
#define Y [fp, #24]
|
||||||
|
#define OLD_INC_Y [fp, #28]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#define OLD_A r3
|
#define OLD_A r3
|
||||||
#define OLD_M r0
|
#define OLD_M r0
|
||||||
|
|
||||||
|
@ -508,6 +533,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp N, #0
|
cmp N, #0
|
||||||
ble gemvn_kernel_L999
|
ble gemvn_kernel_L999
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
vmov s0, OLD_ALPHA
|
||||||
|
#else
|
||||||
|
vldr d0, OLD_ALPHA
|
||||||
|
#endif
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
|
|
||||||
|
|
|
@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#ifndef ARM_SOFTFP_ABI
|
#if !defined(__ARM_PCS_VFP)
|
||||||
//hard
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(DOUBLE)
|
||||||
#define X [fp, #4 ]
|
|
||||||
#define OLD_INC_X [fp, #8 ]
|
|
||||||
#define Y [fp, #12 ]
|
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
|
||||||
#define OLD_A r3
|
|
||||||
#else
|
|
||||||
#define OLD_A_SOFTFP [fp, #0 ]
|
|
||||||
#define OLD_LDA [fp, #4 ]
|
|
||||||
#define X [fp, #8 ]
|
|
||||||
#define OLD_INC_X [fp, #12 ]
|
|
||||||
#define Y [fp, #16 ]
|
|
||||||
#define OLD_INC_Y [fp, #20 ]
|
|
||||||
#define OLD_ALPHA r3
|
#define OLD_ALPHA r3
|
||||||
#define OLD_A r3
|
#define OLD_A_SOFTFP [fp, #0 ]
|
||||||
|
#define OLD_LDA [fp, #4 ]
|
||||||
|
#define X [fp, #8 ]
|
||||||
|
#define OLD_INC_X [fp, #12 ]
|
||||||
|
#define Y [fp, #16 ]
|
||||||
|
#define OLD_INC_Y [fp, #20 ]
|
||||||
|
#else
|
||||||
|
#define OLD_ALPHA [fp, #0 ]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define OLD_LDA [fp, #12]
|
||||||
|
#define X [fp, #16]
|
||||||
|
#define OLD_INC_X [fp, #20]
|
||||||
|
#define Y [fp, #24]
|
||||||
|
#define OLD_INC_Y [fp, #28]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define OLD_A r3
|
||||||
#define OLD_M r0
|
#define OLD_M r0
|
||||||
|
|
||||||
#define AO1 r0
|
#define AO1 r0
|
||||||
|
@ -565,18 +577,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp N, #0
|
cmp N, #0
|
||||||
ble gemvn_kernel_L999
|
ble gemvn_kernel_L999
|
||||||
|
|
||||||
#ifndef DOUBLE
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#ifdef ARM_SOFTFP_ABI
|
#if !defined(DOUBLE)
|
||||||
|
vmov s0, OLD_ALPHA
|
||||||
vmov s0, OLD_ALPHA
|
#else
|
||||||
ldr OLD_A, OLD_A_SOFTFP
|
vldr d0, OLD_ALPHA
|
||||||
#endif
|
#endif
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
|
|
||||||
|
|
||||||
ldr INC_X , OLD_INC_X
|
ldr INC_X , OLD_INC_X
|
||||||
ldr INC_Y , OLD_INC_Y
|
ldr INC_Y , OLD_INC_Y
|
||||||
|
|
||||||
|
|
|
@ -38,25 +38,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#ifndef ARM_SOFTFP_ABI
|
#if !defined(__ARM_PCS_VFP)
|
||||||
//hard abi
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(DOUBLE)
|
||||||
#define X [fp, #4 ]
|
|
||||||
#define OLD_INC_X [fp, #8 ]
|
|
||||||
#define Y [fp, #12 ]
|
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
|
||||||
#define OLD_A r3
|
|
||||||
#else
|
|
||||||
#define OLD_A_SOFTFP [fp, #0 ]
|
|
||||||
#define OLD_LDA [fp, #4 ]
|
|
||||||
#define X [fp, #8 ]
|
|
||||||
#define OLD_INC_X [fp, #12 ]
|
|
||||||
#define Y [fp, #16 ]
|
|
||||||
#define OLD_INC_Y [fp, #20 ]
|
|
||||||
#define OLD_ALPHA r3
|
#define OLD_ALPHA r3
|
||||||
#define OLD_A r3
|
#define OLD_A_SOFTFP [fp, #0 ]
|
||||||
|
#define OLD_LDA [fp, #4 ]
|
||||||
|
#define X [fp, #8 ]
|
||||||
|
#define OLD_INC_X [fp, #12 ]
|
||||||
|
#define Y [fp, #16 ]
|
||||||
|
#define OLD_INC_Y [fp, #20 ]
|
||||||
|
#else
|
||||||
|
#define OLD_ALPHA [fp, #0 ]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define OLD_LDA [fp, #12]
|
||||||
|
#define X [fp, #16]
|
||||||
|
#define OLD_INC_X [fp, #20]
|
||||||
|
#define Y [fp, #24]
|
||||||
|
#define OLD_INC_Y [fp, #28]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define OLD_A r3
|
||||||
#define OLD_N r1
|
#define OLD_N r1
|
||||||
|
|
||||||
#define M r0
|
#define M r0
|
||||||
|
@ -518,11 +530,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp OLD_N, #0
|
cmp OLD_N, #0
|
||||||
ble gemvt_kernel_L999
|
ble gemvt_kernel_L999
|
||||||
|
|
||||||
#ifndef DOUBLE
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#ifdef ARM_SOFTFP_ABI
|
#if !defined(DOUBLE)
|
||||||
vmov s0, OLD_ALPHA
|
vmov s0, OLD_ALPHA
|
||||||
ldr OLD_A, OLD_A_SOFTFP
|
#else
|
||||||
|
vldr d0, OLD_ALPHA
|
||||||
#endif
|
#endif
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
|
|
|
@ -38,11 +38,36 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#define X [fp, #4 ]
|
|
||||||
#define OLD_INC_X [fp, #8 ]
|
#if !defined(DOUBLE)
|
||||||
#define Y [fp, #12 ]
|
#define OLD_ALPHA r3
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
#define OLD_A_SOFTFP [fp, #0 ]
|
||||||
|
#define OLD_LDA [fp, #4 ]
|
||||||
|
#define X [fp, #8 ]
|
||||||
|
#define OLD_INC_X [fp, #12 ]
|
||||||
|
#define Y [fp, #16 ]
|
||||||
|
#define OLD_INC_Y [fp, #20 ]
|
||||||
|
#else
|
||||||
|
#define OLD_ALPHA [fp, #0 ]
|
||||||
|
#define OLD_A_SOFTFP [fp, #8 ]
|
||||||
|
#define OLD_LDA [fp, #12]
|
||||||
|
#define X [fp, #16]
|
||||||
|
#define OLD_INC_X [fp, #20]
|
||||||
|
#define Y [fp, #24]
|
||||||
|
#define OLD_INC_Y [fp, #28]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
#define OLD_A r3
|
#define OLD_A r3
|
||||||
#define OLD_N r1
|
#define OLD_N r1
|
||||||
|
|
||||||
|
@ -476,6 +501,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp OLD_N, #0
|
cmp OLD_N, #0
|
||||||
ble gemvt_kernel_L999
|
ble gemvt_kernel_L999
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
vmov s0, OLD_ALPHA
|
||||||
|
#else
|
||||||
|
vldr d0, OLD_ALPHA
|
||||||
|
#endif
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
|
|
||||||
|
|
|
@ -573,6 +573,13 @@ nrm2_kernel_L999:
|
||||||
#else
|
#else
|
||||||
vsqrt.f32 s1, s1
|
vsqrt.f32 s1, s1
|
||||||
vmul.f32 s0, s0, s1
|
vmul.f32 s0, s0, s1
|
||||||
|
#endif
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
vmov r0, s0
|
||||||
|
#else
|
||||||
|
vmov r0, r1, d0
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bx lr
|
bx lr
|
||||||
|
|
|
@ -503,8 +503,13 @@ nrm2_kernel_L999:
|
||||||
#else
|
#else
|
||||||
vsqrt.f32 s1, s1
|
vsqrt.f32 s1, s1
|
||||||
vmul.f32 s0, s0, s1
|
vmul.f32 s0, s0, s1
|
||||||
#ifdef ARM_SOFTFP_ABI
|
#endif
|
||||||
vmov r0, s0
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if defined(DOUBLE)
|
||||||
|
vmov r0, r1, d0
|
||||||
|
#else
|
||||||
|
vmov r0, s0
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define OLD_INC_Y [fp, #0 ]
|
#define OLD_INC_Y [fp, #0 ]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
#define OLD_C [fp, #4]
|
||||||
|
#define OLD_S [fp, #8]
|
||||||
|
#else
|
||||||
|
#define OLD_C [fp, #8]
|
||||||
|
#define OLD_S [fp, #16]
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#define N r0
|
#define N r0
|
||||||
#define X r1
|
#define X r1
|
||||||
|
@ -462,7 +471,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #8
|
add fp, sp, #8
|
||||||
|
|
||||||
ldr INC_Y , OLD_INC_Y
|
ldr INC_Y , OLD_INC_Y
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
vldr s0, OLD_C
|
||||||
|
vldr s1, OLD_S
|
||||||
|
#else
|
||||||
|
vldr d0, OLD_C
|
||||||
|
vldr d1, OLD_S
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
cmp N, #0
|
cmp N, #0
|
||||||
ble rot_kernel_L999
|
ble rot_kernel_L999
|
||||||
|
|
|
@ -329,20 +329,19 @@ sdot_kernel_L999:
|
||||||
vldm r3, { s8 - s15} // restore floating point registers
|
vldm r3, { s8 - s15} // restore floating point registers
|
||||||
|
|
||||||
#if defined(DSDOT)
|
#if defined(DSDOT)
|
||||||
|
|
||||||
vadd.f64 d0 , d0, d1 // set return value
|
vadd.f64 d0 , d0, d1 // set return value
|
||||||
|
#else
|
||||||
#ifdef ARM_SOFTFP_ABI
|
vadd.f32 s0 , s0, s1 // set return value
|
||||||
vmov r0, r1, d0
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#if defined(DSDOT)
|
||||||
|
vmov r0, r1, d0
|
||||||
#else
|
#else
|
||||||
|
|
||||||
vadd.f32 s0 , s0, s1 // set return value
|
|
||||||
#ifdef ARM_SOFTFP_ABI
|
|
||||||
vmov r0, s0
|
vmov r0, s0
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
sub sp, fp, #24
|
sub sp, fp, #24
|
||||||
pop {r4 - r9, fp}
|
pop {r4 - r9, fp}
|
||||||
bx lr
|
bx lr
|
||||||
|
|
|
@ -62,9 +62,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP r3
|
||||||
|
#define OLD_A_SOFTFP [fp, #4 ]
|
||||||
|
#define B [fp, #8 ]
|
||||||
|
#define C [fp, #12 ]
|
||||||
|
#define OLD_LDC [fp, #16 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -416,6 +424,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -58,14 +58,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define OLD_M r0
|
#define OLD_M r0
|
||||||
#define OLD_N r1
|
#define OLD_N r1
|
||||||
#define OLD_K r2
|
#define OLD_K r2
|
||||||
|
|
||||||
#ifdef ARM_SOFTFP_ABI
|
|
||||||
#define OLD_ALPHA r3
|
|
||||||
//#define OLD_A
|
|
||||||
#else //hard
|
|
||||||
#define OLD_A r3
|
#define OLD_A r3
|
||||||
#define OLD_ALPHA s0
|
#define OLD_ALPHA s0
|
||||||
#endif
|
|
||||||
|
|
||||||
/******************************************************
|
/******************************************************
|
||||||
* [fp, #-128] - [fp, #-64] is reserved
|
* [fp, #-128] - [fp, #-64] is reserved
|
||||||
|
@ -77,10 +71,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define M [fp, #-256 ]
|
#define M [fp, #-256 ]
|
||||||
#define N [fp, #-260 ]
|
#define N [fp, #-260 ]
|
||||||
#define K [fp, #-264 ]
|
#define K [fp, #-264 ]
|
||||||
|
|
||||||
#ifndef ARM_SOFTFP_ABI
|
|
||||||
#define A [fp, #-268 ]
|
#define A [fp, #-268 ]
|
||||||
#endif
|
|
||||||
|
|
||||||
#define FP_ZERO [fp, #-240]
|
#define FP_ZERO [fp, #-240]
|
||||||
#define FP_ZERO_0 [fp, #-240]
|
#define FP_ZERO_0 [fp, #-240]
|
||||||
|
@ -88,17 +79,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
#ifdef ARM_SOFTFP_ABI
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#define A [fp, #4 ]
|
#define OLD_ALPHA_SOFTFP r3
|
||||||
|
#define OLD_A_SOFTFP [fp, #4 ]
|
||||||
#define B [fp, #8 ]
|
#define B [fp, #8 ]
|
||||||
#define C [fp, #12 ]
|
#define C [fp, #12 ]
|
||||||
#define OLD_LDC [fp, #16 ]
|
#define OLD_LDC [fp, #16 ]
|
||||||
#else //hard
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
#define L r2
|
#define L r2
|
||||||
|
@ -867,16 +859,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
|
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
||||||
#ifdef ARM_SOFTFP_ABI
|
|
||||||
str OLD_ALPHA, ALPHA
|
|
||||||
#else //hard
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
vstr OLD_ALPHA, ALPHA
|
vstr OLD_ALPHA, ALPHA
|
||||||
#endif
|
|
||||||
sub r3, fp, #128
|
sub r3, fp, #128
|
||||||
vstm r3, { s8 - s31} // store floating point registers
|
vstm r3, { s8 - s31} // store floating point registers
|
||||||
|
|
||||||
|
|
|
@ -65,10 +65,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-276 ]
|
#define ALPHA [fp, #-276 ]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP r3
|
||||||
|
#define OLD_A_SOFTFP [fp, #4 ]
|
||||||
|
#define B [fp, #8 ]
|
||||||
|
#define OLD_C [fp, #12 ]
|
||||||
|
#define OLD_LDC [fp, #16 ]
|
||||||
|
#define OFFSET [fp, #20 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define OLD_C [fp, #8 ]
|
#define OLD_C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -395,6 +404,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -64,10 +64,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define ALPHA [fp, #-280]
|
#define ALPHA [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHA_SOFTFP r3
|
||||||
|
#define OLD_A_SOFTFP [fp, #4 ]
|
||||||
|
#define B [fp, #8 ]
|
||||||
|
#define C [fp, #12 ]
|
||||||
|
#define OLD_LDC [fp, #16 ]
|
||||||
|
#define OFFSET [fp, #20 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -782,6 +791,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vmov OLD_ALPHA, OLD_ALPHA_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -38,9 +38,43 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
|
||||||
|
#if !defined(COMPLEX)
|
||||||
|
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
#define OLD_X [fp, #0 ]
|
||||||
|
#define OLD_INC_X [fp, #4 ]
|
||||||
|
#define OLD_Y [fp, #8 ]
|
||||||
|
#define OLD_INC_Y [fp, #12 ]
|
||||||
|
#else
|
||||||
|
#define OLD_X [fp, #8 ]
|
||||||
|
#define OLD_INC_X [fp, #12]
|
||||||
|
#define OLD_Y [fp, #16]
|
||||||
|
#define OLD_INC_Y [fp, #20]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#else //COMPLEX
|
||||||
|
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
#define OLD_X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define OLD_Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
#else
|
||||||
|
#define OLD_X [fp, #16]
|
||||||
|
#define OLD_INC_X [fp, #20]
|
||||||
|
#define OLD_Y [fp, #24]
|
||||||
|
#define OLD_INC_Y [fp, #28]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif // !defined(__ARM_PCS_VFP)
|
||||||
|
|
||||||
|
#else
|
||||||
#define OLD_INC_X [fp, #0 ]
|
#define OLD_INC_X [fp, #0 ]
|
||||||
#define OLD_Y [fp, #4 ]
|
#define OLD_Y [fp, #4 ]
|
||||||
#define OLD_INC_Y [fp, #8 ]
|
#define OLD_INC_Y [fp, #8 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#define N r0
|
#define N r0
|
||||||
|
@ -229,6 +263,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
push {r4 , fp}
|
push {r4 , fp}
|
||||||
add fp, sp, #8
|
add fp, sp, #8
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
ldr X, OLD_X
|
||||||
|
#endif
|
||||||
ldr INC_X , OLD_INC_X
|
ldr INC_X , OLD_INC_X
|
||||||
ldr Y, OLD_Y
|
ldr Y, OLD_Y
|
||||||
ldr INC_Y , OLD_INC_Y
|
ldr INC_Y , OLD_INC_Y
|
||||||
|
|
|
@ -41,8 +41,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define N r0
|
#define N r0
|
||||||
#define X r1
|
#define X r1
|
||||||
#define INC_X r2
|
#define INC_X r2
|
||||||
#define OLD_Y r3
|
|
||||||
|
|
||||||
|
|
||||||
/******************************************************
|
/******************************************************
|
||||||
* [fp, #-128] - [fp, #-64] is reserved
|
* [fp, #-128] - [fp, #-64] is reserved
|
||||||
|
@ -50,7 +48,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
* registers
|
* registers
|
||||||
*******************************************************/
|
*******************************************************/
|
||||||
|
|
||||||
#define OLD_INC_Y [fp, #4 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_RETURN_ADDR r0
|
||||||
|
#define OLD_N r1
|
||||||
|
#define OLD_X r2
|
||||||
|
#define OLD_INC_X r3
|
||||||
|
#define OLD_Y [fp, #0 ]
|
||||||
|
#define OLD_INC_Y [fp, #4 ]
|
||||||
|
#define RETURN_ADDR r8
|
||||||
|
#else
|
||||||
|
#define OLD_Y r3
|
||||||
|
#define OLD_INC_Y [fp, #0 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r5
|
#define I r5
|
||||||
#define Y r6
|
#define Y r6
|
||||||
|
@ -181,7 +190,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.align 5
|
.align 5
|
||||||
|
|
||||||
push {r4 - r9, fp}
|
push {r4 - r9, fp}
|
||||||
add fp, sp, #24
|
add fp, sp, #28
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
sub r4, fp, #128
|
sub r4, fp, #128
|
||||||
|
@ -194,9 +203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
vcvt.f64.f32 d2, s0
|
vcvt.f64.f32 d2, s0
|
||||||
vcvt.f64.f32 d3, s0
|
vcvt.f64.f32 d3, s0
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
mov RETURN_ADDR, OLD_RETURN_ADDR
|
||||||
|
mov N, OLD_N
|
||||||
|
mov X, OLD_X
|
||||||
|
mov INC_X, OLD_INC_X
|
||||||
|
ldr Y, OLD_Y
|
||||||
|
ldr INC_Y, OLD_INC_Y
|
||||||
|
#else
|
||||||
mov Y, OLD_Y
|
mov Y, OLD_Y
|
||||||
ldr INC_Y, OLD_INC_Y
|
ldr INC_Y, OLD_INC_Y
|
||||||
|
#endif
|
||||||
|
|
||||||
cmp N, #0
|
cmp N, #0
|
||||||
ble zdot_kernel_L999
|
ble zdot_kernel_L999
|
||||||
|
@ -280,8 +297,11 @@ zdot_kernel_L999:
|
||||||
vadd.f64 d0 , d0, d2
|
vadd.f64 d0 , d0, d2
|
||||||
vsub.f64 d1 , d1, d3
|
vsub.f64 d1 , d1, d3
|
||||||
#endif
|
#endif
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vstm RETURN_ADDR, {d0 - d1}
|
||||||
|
#endif
|
||||||
|
|
||||||
sub sp, fp, #24
|
sub sp, fp, #28
|
||||||
pop {r4 - r9, fp}
|
pop {r4 - r9, fp}
|
||||||
bx lr
|
bx lr
|
||||||
|
|
||||||
|
|
|
@ -64,9 +64,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP [fp, #4]
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #12]
|
||||||
|
#define OLD_A_SOFTFP [fp, #20 ]
|
||||||
|
#define B [fp, #24 ]
|
||||||
|
#define C [fp, #28 ]
|
||||||
|
#define OLD_LDC [fp, #32 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -863,6 +872,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -80,9 +80,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP [fp, #4]
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #12]
|
||||||
|
#define OLD_A_SOFTFP [fp, #20 ]
|
||||||
|
#define B [fp, #24 ]
|
||||||
|
#define C [fp, #28 ]
|
||||||
|
#define OLD_LDC [fp, #32 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -909,6 +918,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#define X [fp, #4 ]
|
#define OLD_ALPHAR [fp, #0 ]
|
||||||
#define OLD_INC_X [fp, #8 ]
|
#define OLD_ALPHAI [fp, #8 ]
|
||||||
#define Y [fp, #12 ]
|
#define OLD_A_SOFTFP [fp, #16]
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
#define OLD_LDA [fp, #20]
|
||||||
|
#define X [fp, #24]
|
||||||
|
#define OLD_INC_X [fp, #28]
|
||||||
|
#define Y [fp, #32]
|
||||||
|
#define OLD_INC_Y [fp, #36]
|
||||||
|
#else
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define OLD_A r3
|
#define OLD_A r3
|
||||||
#define OLD_M r0
|
#define OLD_M r0
|
||||||
|
|
||||||
|
@ -465,6 +477,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp N, #0
|
cmp N, #0
|
||||||
ble zgemvn_kernel_L999
|
ble zgemvn_kernel_L999
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr d0, OLD_ALPHAR
|
||||||
|
vldr d1, OLD_ALPHAI
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
vstr d0 , ALPHA_R
|
vstr d0 , ALPHA_R
|
||||||
|
|
|
@ -38,11 +38,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define STACKSIZE 256
|
#define STACKSIZE 256
|
||||||
|
|
||||||
#define OLD_LDA [fp, #0 ]
|
#if !defined(__ARM_PCS_VFP)
|
||||||
#define X [fp, #4 ]
|
#define OLD_ALPHAR [fp, #0 ]
|
||||||
#define OLD_INC_X [fp, #8 ]
|
#define OLD_ALPHAI [fp, #8 ]
|
||||||
#define Y [fp, #12 ]
|
#define OLD_A_SOFTFP [fp, #16]
|
||||||
#define OLD_INC_Y [fp, #16 ]
|
#define OLD_LDA [fp, #20]
|
||||||
|
#define X [fp, #24]
|
||||||
|
#define OLD_INC_X [fp, #28]
|
||||||
|
#define Y [fp, #32]
|
||||||
|
#define OLD_INC_Y [fp, #36]
|
||||||
|
#else
|
||||||
|
#define OLD_LDA [fp, #0 ]
|
||||||
|
#define X [fp, #4 ]
|
||||||
|
#define OLD_INC_X [fp, #8 ]
|
||||||
|
#define Y [fp, #12 ]
|
||||||
|
#define OLD_INC_Y [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define OLD_A r3
|
#define OLD_A r3
|
||||||
#define OLD_N r1
|
#define OLD_N r1
|
||||||
|
|
||||||
|
@ -360,6 +372,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
cmp OLD_N, #0
|
cmp OLD_N, #0
|
||||||
ble zgemvt_kernel_L999
|
ble zgemvt_kernel_L999
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr d0, OLD_ALPHAR
|
||||||
|
vldr d1, OLD_ALPHAI
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
|
|
||||||
str OLD_A, A
|
str OLD_A, A
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
|
|
||||||
|
|
|
@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP [fp, #4]
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #12]
|
||||||
|
#define OLD_A_SOFTFP [fp, #20 ]
|
||||||
|
#define B [fp, #24 ]
|
||||||
|
#define C [fp, #28 ]
|
||||||
|
#define OLD_LDC [fp, #32 ]
|
||||||
|
#define OFFSET [fp, #36 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -882,6 +892,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
|
@ -66,10 +66,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ALPHA_I [fp, #-272]
|
#define ALPHA_I [fp, #-272]
|
||||||
#define ALPHA_R [fp, #-280]
|
#define ALPHA_R [fp, #-280]
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
#define OLD_ALPHAR_SOFTFP [fp, #4]
|
||||||
|
#define OLD_ALPHAI_SOFTFP [fp, #12]
|
||||||
|
#define OLD_A_SOFTFP [fp, #20 ]
|
||||||
|
#define B [fp, #24 ]
|
||||||
|
#define C [fp, #28 ]
|
||||||
|
#define OLD_LDC [fp, #32 ]
|
||||||
|
#define OFFSET [fp, #36 ]
|
||||||
|
#else
|
||||||
#define B [fp, #4 ]
|
#define B [fp, #4 ]
|
||||||
#define C [fp, #8 ]
|
#define C [fp, #8 ]
|
||||||
#define OLD_LDC [fp, #12 ]
|
#define OLD_LDC [fp, #12 ]
|
||||||
#define OFFSET [fp, #16 ]
|
#define OFFSET [fp, #16 ]
|
||||||
|
#endif
|
||||||
|
|
||||||
#define I r0
|
#define I r0
|
||||||
#define J r1
|
#define J r1
|
||||||
|
@ -883,6 +893,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
add fp, sp, #24
|
add fp, sp, #24
|
||||||
sub sp, sp, #STACKSIZE // reserve stack
|
sub sp, sp, #STACKSIZE // reserve stack
|
||||||
|
|
||||||
|
#if !defined(__ARM_PCS_VFP)
|
||||||
|
vldr OLD_ALPHA_R, OLD_ALPHAR_SOFTFP
|
||||||
|
vldr OLD_ALPHA_I, OLD_ALPHAI_SOFTFP
|
||||||
|
ldr OLD_A, OLD_A_SOFTFP
|
||||||
|
#endif
|
||||||
str OLD_M, M
|
str OLD_M, M
|
||||||
str OLD_N, N
|
str OLD_N, N
|
||||||
str OLD_K, K
|
str OLD_K, K
|
||||||
|
|
Loading…
Reference in New Issue