Functional Assembly Kernels for CortexA57
Adding functional (non-optimized) kernels for Cortex-A57 with the following layouts. SGEMM - 16x4, 8x8 CGEMM - 8x4 DGEMM - 8x4, 4x8
This commit is contained in:
parent
587455868e
commit
7aa1ad4923
|
@ -60,32 +60,55 @@ DGEMVTKERNEL = gemv_t.S
|
||||||
CGEMVTKERNEL = zgemv_t.S
|
CGEMVTKERNEL = zgemv_t.S
|
||||||
ZGEMVTKERNEL = zgemv_t.S
|
ZGEMVTKERNEL = zgemv_t.S
|
||||||
|
|
||||||
STRMMKERNEL = strmm_kernel_4x4.S
|
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
DTRMMKERNEL = dtrmm_kernel_4x4.S
|
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
CTRMMKERNEL = ctrmm_kernel_4x4.S
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
ZTRMMKERNEL = ztrmm_kernel_4x4.S
|
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||||
|
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||||
SGEMMKERNEL = sgemm_kernel_4x4.S
|
SGEMMINCOPYOBJ = sgemm_incopy.o
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
endif
|
||||||
|
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
DGEMMKERNEL = dgemm_kernel_4x4.S
|
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
|
||||||
|
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
|
||||||
|
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
|
||||||
|
DGEMMINCOPYOBJ = dgemm_incopy.o
|
||||||
|
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
||||||
|
endif
|
||||||
|
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
CGEMMKERNEL = cgemm_kernel_4x4.S
|
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
|
||||||
|
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
|
||||||
|
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
|
||||||
|
CGEMMINCOPYOBJ = cgemm_incopy.o
|
||||||
|
CGEMMITCOPYOBJ = cgemm_itcopy.o
|
||||||
|
endif
|
||||||
|
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
|
||||||
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
||||||
|
|
||||||
ZGEMMKERNEL = zgemm_kernel_4x4.S
|
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
|
||||||
|
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
|
||||||
|
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
|
||||||
|
ZGEMMINCOPYOBJ = zgemm_incopy.o
|
||||||
|
ZGEMMITCOPYOBJ = zgemm_itcopy.o
|
||||||
|
endif
|
||||||
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
|
||||||
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue