From 52f6699b5b8cedb018867947de4fb790555da904 Mon Sep 17 00:00:00 2001 From: Valter Akira Miasato Filho Date: Thu, 29 Jun 2017 14:24:00 -0300 Subject: [PATCH 1/2] Tempfix for mixed precision when compiling to ARMv7 with ARM_SOFTFP_ABI=1 --- kernel/arm/KERNEL.ARMV7 | 112 ++++++++++++++++++++-------------------- 1 file changed, 56 insertions(+), 56 deletions(-) diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index d5cd94fbd..a1944b049 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -1,51 +1,51 @@ ################################################################################# SAMAXKERNEL = iamax_vfp.S -DAMAXKERNEL = iamax_vfp.S -CAMAXKERNEL = iamax_vfp.S -ZAMAXKERNEL = iamax_vfp.S +DAMAXKERNEL = amax.c +CAMAXKERNEL = zamax.c +ZAMAXKERNEL = zamax.c SAMINKERNEL = iamax_vfp.S -DAMINKERNEL = iamax_vfp.S -CAMINKERNEL = iamax_vfp.S -ZAMINKERNEL = iamax_vfp.S +DAMINKERNEL = amin.c +CAMINKERNEL = zamin.c +ZAMINKERNEL = zamin.c SMAXKERNEL = iamax_vfp.S -DMAXKERNEL = iamax_vfp.S +DMAXKERNEL = max.c SMINKERNEL = iamax_vfp.S -DMINKERNEL = iamax_vfp.S +DMINKERNEL = min.c ISAMAXKERNEL = iamax_vfp.S -IDAMAXKERNEL = iamax_vfp.S -ICAMAXKERNEL = iamax_vfp.S -IZAMAXKERNEL = iamax_vfp.S +IDAMAXKERNEL = iamax.c +ICAMAXKERNEL = izamax.c +IZAMAXKERNEL = izamax.c ISAMINKERNEL = iamax_vfp.S -IDAMINKERNEL = iamax_vfp.S -ICAMINKERNEL = iamax_vfp.S -IZAMINKERNEL = iamax_vfp.S +IDAMINKERNEL = iamin.c +ICAMINKERNEL = izamin.c +IZAMINKERNEL = izamin.c ISMAXKERNEL = iamax_vfp.S -IDMAXKERNEL = iamax_vfp.S +IDMAXKERNEL = imax.c ISMINKERNEL = iamax_vfp.S -IDMINKERNEL = iamax_vfp.S +IDMINKERNEL = imin.c SSWAPKERNEL = swap_vfp.S -DSWAPKERNEL = swap_vfp.S -CSWAPKERNEL = swap_vfp.S -ZSWAPKERNEL = swap_vfp.S +DSWAPKERNEL = swap.c +CSWAPKERNEL = zswap.c +ZSWAPKERNEL = zswap.c SASUMKERNEL = asum_vfp.S -DASUMKERNEL = asum_vfp.S -CASUMKERNEL = asum_vfp.S -ZASUMKERNEL = asum_vfp.S +DASUMKERNEL = asum.c +CASUMKERNEL = zasum.c +ZASUMKERNEL = zasum.c SAXPYKERNEL = axpy_vfp.S -DAXPYKERNEL = axpy_vfp.S -CAXPYKERNEL = axpy_vfp.S -ZAXPYKERNEL = axpy_vfp.S +DAXPYKERNEL = axpy.c +CAXPYKERNEL = zaxpy.c +ZAXPYKERNEL = zaxpy.c SCOPYKERNEL = copy.c DCOPYKERNEL = copy.c @@ -53,19 +53,19 @@ CCOPYKERNEL = zcopy.c ZCOPYKERNEL = zcopy.c SDOTKERNEL = sdot_vfp.S -DDOTKERNEL = ddot_vfp.S -CDOTKERNEL = cdot_vfp.S -ZDOTKERNEL = zdot_vfp.S +DDOTKERNEL = dot.c +CDOTKERNEL = zdot.c +ZDOTKERNEL = zdot.c SNRM2KERNEL = nrm2_vfpv3.S -DNRM2KERNEL = nrm2_vfpv3.S -CNRM2KERNEL = nrm2_vfpv3.S -ZNRM2KERNEL = nrm2_vfpv3.S +DNRM2KERNEL = nrm2.c +CNRM2KERNEL = znrm2.c +ZNRM2KERNEL = znrm2.c SROTKERNEL = rot_vfp.S -DROTKERNEL = rot_vfp.S -CROTKERNEL = rot_vfp.S -ZROTKERNEL = rot_vfp.S +DROTKERNEL = rot.c +CROTKERNEL = zrot.c +ZROTKERNEL = zrot.c SSCALKERNEL = scal.c DSCALKERNEL = scal.c @@ -73,19 +73,19 @@ CSCALKERNEL = zscal.c ZSCALKERNEL = zscal.c SGEMVNKERNEL = gemv_n_vfpv3.S -DGEMVNKERNEL = gemv_n_vfpv3.S -CGEMVNKERNEL = cgemv_n_vfp.S -ZGEMVNKERNEL = zgemv_n_vfp.S +DGEMVNKERNEL = gemv_n.c +CGEMVNKERNEL = zgemv_n.c +ZGEMVNKERNEL = zgemv_n.c SGEMVTKERNEL = gemv_t_vfp.S -DGEMVTKERNEL = gemv_t_vfp.S -CGEMVTKERNEL = cgemv_t_vfp.S -ZGEMVTKERNEL = zgemv_t_vfp.S +DGEMVTKERNEL = gemv_t.c +CGEMVTKERNEL = zgemv_t.c +ZGEMVTKERNEL = zgemv_t.c STRMMKERNEL = strmm_kernel_4x4_vfpv3.S -DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S -CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S -ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S SGEMMONCOPY = sgemm_ncopy_4_vfp.S @@ -93,23 +93,23 @@ SGEMMOTCOPY = sgemm_tcopy_4_vfp.S SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o -DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S -DGEMMONCOPY = dgemm_ncopy_4_vfp.S -DGEMMOTCOPY = dgemm_tcopy_4_vfp.S +DGEMMKERNEL = ../generic/gemmkernel_2x2.c +DGEMMONCOPY = ../generic/gemm_ncopy_2.c +DGEMMOTCOPY = ../generic/gemm_tcopy_2.c DGEMMONCOPYOBJ = dgemm_oncopy.o DGEMMOTCOPYOBJ = dgemm_otcopy.o -CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S -CGEMMONCOPY = cgemm_ncopy_2_vfp.S -CGEMMOTCOPY = cgemm_tcopy_2_vfp.S -CGEMMONCOPYOBJ = cgemm_oncopy.o -CGEMMOTCOPYOBJ = cgemm_otcopy.o +CGEMMKERNEL = ../generic/zgemmkernel_2x2.c +CGEMMONCOPY = ../generic/zgemm_ncopy_2.c +CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o -ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S -ZGEMMONCOPY = zgemm_ncopy_2_vfp.S -ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S -ZGEMMONCOPYOBJ = zgemm_oncopy.o -ZGEMMOTCOPYOBJ = zgemm_otcopy.o +ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c +ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c From 682d9be199af6af430dcdb4f1fcd25947194d002 Mon Sep 17 00:00:00 2001 From: Valter Akira Miasato Filho Date: Thu, 29 Jun 2017 15:34:34 -0300 Subject: [PATCH 2/2] Extra --- kernel/arm/KERNEL.ARMV7 | 244 ++++++++++++++++++++++++++++------------ 1 file changed, 175 insertions(+), 69 deletions(-) diff --git a/kernel/arm/KERNEL.ARMV7 b/kernel/arm/KERNEL.ARMV7 index a1944b049..47554efac 100644 --- a/kernel/arm/KERNEL.ARMV7 +++ b/kernel/arm/KERNEL.ARMV7 @@ -1,91 +1,42 @@ ################################################################################# + +############################################################ +# Files already implemented in ARM Assembly for softfp ABI # +############################################################ SAMAXKERNEL = iamax_vfp.S -DAMAXKERNEL = amax.c -CAMAXKERNEL = zamax.c -ZAMAXKERNEL = zamax.c SAMINKERNEL = iamax_vfp.S -DAMINKERNEL = amin.c -CAMINKERNEL = zamin.c -ZAMINKERNEL = zamin.c SMAXKERNEL = iamax_vfp.S -DMAXKERNEL = max.c SMINKERNEL = iamax_vfp.S -DMINKERNEL = min.c ISAMAXKERNEL = iamax_vfp.S -IDAMAXKERNEL = iamax.c -ICAMAXKERNEL = izamax.c -IZAMAXKERNEL = izamax.c ISAMINKERNEL = iamax_vfp.S -IDAMINKERNEL = iamin.c -ICAMINKERNEL = izamin.c -IZAMINKERNEL = izamin.c ISMAXKERNEL = iamax_vfp.S -IDMAXKERNEL = imax.c ISMINKERNEL = iamax_vfp.S -IDMINKERNEL = imin.c SSWAPKERNEL = swap_vfp.S -DSWAPKERNEL = swap.c -CSWAPKERNEL = zswap.c -ZSWAPKERNEL = zswap.c SASUMKERNEL = asum_vfp.S -DASUMKERNEL = asum.c -CASUMKERNEL = zasum.c -ZASUMKERNEL = zasum.c SAXPYKERNEL = axpy_vfp.S -DAXPYKERNEL = axpy.c -CAXPYKERNEL = zaxpy.c -ZAXPYKERNEL = zaxpy.c - -SCOPYKERNEL = copy.c -DCOPYKERNEL = copy.c -CCOPYKERNEL = zcopy.c -ZCOPYKERNEL = zcopy.c SDOTKERNEL = sdot_vfp.S -DDOTKERNEL = dot.c -CDOTKERNEL = zdot.c -ZDOTKERNEL = zdot.c SNRM2KERNEL = nrm2_vfpv3.S -DNRM2KERNEL = nrm2.c -CNRM2KERNEL = znrm2.c -ZNRM2KERNEL = znrm2.c SROTKERNEL = rot_vfp.S -DROTKERNEL = rot.c -CROTKERNEL = zrot.c -ZROTKERNEL = zrot.c - -SSCALKERNEL = scal.c -DSCALKERNEL = scal.c -CSCALKERNEL = zscal.c -ZSCALKERNEL = zscal.c SGEMVNKERNEL = gemv_n_vfpv3.S -DGEMVNKERNEL = gemv_n.c -CGEMVNKERNEL = zgemv_n.c -ZGEMVNKERNEL = zgemv_n.c SGEMVTKERNEL = gemv_t_vfp.S -DGEMVTKERNEL = gemv_t.c -CGEMVTKERNEL = zgemv_t.c -ZGEMVTKERNEL = zgemv_t.c STRMMKERNEL = strmm_kernel_4x4_vfpv3.S -DTRMMKERNEL = ../generic/trmmkernel_2x2.c -CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c -ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c SGEMMKERNEL = sgemm_kernel_4x4_vfpv3.S SGEMMONCOPY = sgemm_ncopy_4_vfp.S @@ -93,6 +44,70 @@ SGEMMOTCOPY = sgemm_tcopy_4_vfp.S SGEMMONCOPYOBJ = sgemm_oncopy.o SGEMMOTCOPYOBJ = sgemm_otcopy.o +############################################################### +# Files yet to be implemented in ARM Assembly for softfp ABI # +############################################################### +ifeq ($(ARM_SOFTFP_ABI), 1) +DAMAXKERNEL = amax.c +CAMAXKERNEL = zamax.c +ZAMAXKERNEL = zamax.c + +DAMINKERNEL = amin.c +CAMINKERNEL = zamin.c +ZAMINKERNEL = zamin.c + +DMAXKERNEL = max.c + +DMINKERNEL = min.c + +IDAMAXKERNEL = iamax.c +ICAMAXKERNEL = izamax.c +IZAMAXKERNEL = izamax.c + +IDAMINKERNEL = iamin.c +ICAMINKERNEL = izamin.c +IZAMINKERNEL = izamin.c + +IDMAXKERNEL = imax.c + +IDMINKERNEL = imin.c + +DSWAPKERNEL = swap.c +CSWAPKERNEL = zswap.c +ZSWAPKERNEL = zswap.c + +DASUMKERNEL = asum.c +CASUMKERNEL = zasum.c +ZASUMKERNEL = zasum.c + +DAXPYKERNEL = axpy.c +CAXPYKERNEL = zaxpy.c +ZAXPYKERNEL = zaxpy.c + +DDOTKERNEL = dot.c +CDOTKERNEL = zdot.c +ZDOTKERNEL = zdot.c + +DNRM2KERNEL = nrm2.c +CNRM2KERNEL = znrm2.c +ZNRM2KERNEL = znrm2.c + +DROTKERNEL = rot.c +CROTKERNEL = zrot.c +ZROTKERNEL = zrot.c + +DGEMVNKERNEL = gemv_n.c +CGEMVNKERNEL = zgemv_n.c +ZGEMVNKERNEL = zgemv_n.c + +DGEMVTKERNEL = gemv_t.c +CGEMVTKERNEL = zgemv_t.c +ZGEMVTKERNEL = zgemv_t.c + +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + DGEMMKERNEL = ../generic/gemmkernel_2x2.c DGEMMONCOPY = ../generic/gemm_ncopy_2.c DGEMMOTCOPY = ../generic/gemm_tcopy_2.c @@ -111,24 +126,115 @@ ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c ZGEMMONCOPYOBJ = zgemm_oncopy.o ZGEMMOTCOPYOBJ = zgemm_otcopy.o -STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +else # Use optimized hard-float implementations -DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +DAMAXKERNEL = iamax_vfp.S +CAMAXKERNEL = iamax_vfp.S +ZAMAXKERNEL = iamax_vfp.S -CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +DAMINKERNEL = iamax_vfp.S +CAMINKERNEL = iamax_vfp.S +ZAMINKERNEL = iamax_vfp.S -ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c -ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c -ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c -ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c +DMAXKERNEL = iamax_vfp.S +DMINKERNEL = iamax_vfp.S +IDAMAXKERNEL = iamax_vfp.S +ICAMAXKERNEL = iamax_vfp.S +IZAMAXKERNEL = iamax_vfp.S + +IDAMINKERNEL = iamax_vfp.S +ICAMINKERNEL = iamax_vfp.S +IZAMINKERNEL = iamax_vfp.S + +IDMAXKERNEL = iamax_vfp.S + +IDMINKERNEL = iamax_vfp.S + +DSWAPKERNEL = swap_vfp.S +CSWAPKERNEL = swap_vfp.S +ZSWAPKERNEL = swap_vfp.S + +DASUMKERNEL = asum_vfp.S +CASUMKERNEL = asum_vfp.S +ZASUMKERNEL = asum_vfp.S + +DAXPYKERNEL = axpy_vfp.S +CAXPYKERNEL = axpy_vfp.S +ZAXPYKERNEL = axpy_vfp.S + +DDOTKERNEL = ddot_vfp.S +CDOTKERNEL = cdot_vfp.S +ZDOTKERNEL = zdot_vfp.S + +DNRM2KERNEL = nrm2_vfpv3.S +CNRM2KERNEL = nrm2_vfpv3.S +ZNRM2KERNEL = nrm2_vfpv3.S + +DROTKERNEL = rot_vfp.S +CROTKERNEL = rot_vfp.S +ZROTKERNEL = rot_vfp.S + +DGEMVNKERNEL = gemv_n_vfpv3.S +CGEMVNKERNEL = cgemv_n_vfp.S +ZGEMVNKERNEL = zgemv_n_vfp.S + +DGEMVTKERNEL = gemv_t_vfp.S +CGEMVTKERNEL = cgemv_t_vfp.S +ZGEMVTKERNEL = zgemv_t_vfp.S + +DTRMMKERNEL = dtrmm_kernel_4x4_vfpv3.S +CTRMMKERNEL = ctrmm_kernel_2x2_vfpv3.S +ZTRMMKERNEL = ztrmm_kernel_2x2_vfpv3.S + +DGEMMKERNEL = dgemm_kernel_4x4_vfpv3.S +DGEMMONCOPY = dgemm_ncopy_4_vfp.S +DGEMMOTCOPY = dgemm_tcopy_4_vfp.S + +CGEMMKERNEL = cgemm_kernel_2x2_vfpv3.S +CGEMMONCOPY = cgemm_ncopy_2_vfp.S +CGEMMOTCOPY = cgemm_tcopy_2_vfp.S +CGEMMONCOPYOBJ = cgemm_oncopy.o +CGEMMOTCOPYOBJ = cgemm_otcopy.o + +ZGEMMKERNEL = zgemm_kernel_2x2_vfpv3.S +ZGEMMONCOPY = zgemm_ncopy_2_vfp.S +ZGEMMOTCOPY = zgemm_tcopy_2_vfp.S +ZGEMMONCOPYOBJ = zgemm_oncopy.o +ZGEMMOTCOPYOBJ = zgemm_otcopy.o + +endif + +############################ +# Files in pure C routines # +############################ +SCOPYKERNEL = copy.c +DCOPYKERNEL = copy.c +CCOPYKERNEL = zcopy.c +ZCOPYKERNEL = zcopy.c + +SSCALKERNEL = scal.c +DSCALKERNEL = scal.c +CSCALKERNEL = zscal.c +ZSCALKERNEL = zscal.c + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c \ No newline at end of file