Functional Assembly Kernels for CortexA57

Adding functional (non-optimized) kernels for Cortex-A57
with the following layouts.
SGEMM - 16x4, 8x8
CGEMM - 8x4
DGEMM - 8x4, 4x8
This commit is contained in:
Ashwin Sekhar T K 2016-03-14 19:33:21 +05:30
parent 587455868e
commit 7aa1ad4923
11 changed files with 21161 additions and 17 deletions

View File

@ -60,32 +60,55 @@ DGEMVTKERNEL = gemv_t.S
CGEMVTKERNEL = zgemv_t.S
ZGEMVTKERNEL = zgemv_t.S
STRMMKERNEL = strmm_kernel_4x4.S
DTRMMKERNEL = dtrmm_kernel_4x4.S
CTRMMKERNEL = ctrmm_kernel_4x4.S
ZTRMMKERNEL = ztrmm_kernel_4x4.S
SGEMMKERNEL = sgemm_kernel_4x4.S
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy.o
SGEMMITCOPYOBJ = sgemm_itcopy.o
endif
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
DGEMMKERNEL = dgemm_kernel_4x4.S
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
endif
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
CGEMMKERNEL = cgemm_kernel_4x4.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
CGEMMINCOPYOBJ = cgemm_incopy.o
CGEMMITCOPYOBJ = cgemm_itcopy.o
endif
CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
ZGEMMKERNEL = zgemm_kernel_4x4.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
ZGEMMINCOPYOBJ = zgemm_incopy.o
ZGEMMITCOPYOBJ = zgemm_itcopy.o
endif
ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o

2044
kernel/arm64/cgemm_kernel_8x4.S Executable file

File diff suppressed because it is too large Load Diff

2425
kernel/arm64/ctrmm_kernel_8x4.S Executable file

File diff suppressed because it is too large Load Diff

1689
kernel/arm64/dgemm_kernel_4x8.S Executable file

File diff suppressed because it is too large Load Diff

1570
kernel/arm64/dgemm_kernel_8x4.S Executable file

File diff suppressed because it is too large Load Diff

2026
kernel/arm64/dtrmm_kernel_4x8.S Executable file

File diff suppressed because it is too large Load Diff

1849
kernel/arm64/dtrmm_kernel_8x4.S Executable file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

2431
kernel/arm64/strmm_kernel_16x4.S Executable file

File diff suppressed because it is too large Load Diff

2795
kernel/arm64/strmm_kernel_8x8.S Executable file

File diff suppressed because it is too large Load Diff