Optimized dgemm kernel for CORTEXA57
This commit is contained in:
parent
19fdbee291
commit
402443bf9c
|
@ -61,6 +61,7 @@ CGEMVTKERNEL = zgemv_t.S
|
||||||
ZGEMVTKERNEL = zgemv_t.S
|
ZGEMVTKERNEL = zgemv_t.S
|
||||||
|
|
||||||
STRMMKERNEL = ../generic/trmmkernel_4x4.c
|
STRMMKERNEL = ../generic/trmmkernel_4x4.c
|
||||||
|
DTRMMKERNEL = ../generic/trmmkernel_4x4.c
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_4x4.S
|
SGEMMKERNEL = sgemm_kernel_4x4.S
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
|
@ -68,3 +69,9 @@ SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
||||||
|
|
||||||
|
DGEMMKERNEL = dgemm_kernel_4x4.S
|
||||||
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
|
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
||||||
|
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
10
param.h
10
param.h
|
@ -2229,8 +2229,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define SGEMM_DEFAULT_UNROLL_N 4
|
#define SGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
#define DGEMM_DEFAULT_UNROLL_M 2
|
#define DGEMM_DEFAULT_UNROLL_M 4
|
||||||
#define DGEMM_DEFAULT_UNROLL_N 2
|
#define DGEMM_DEFAULT_UNROLL_N 4
|
||||||
|
|
||||||
#define CGEMM_DEFAULT_UNROLL_M 2
|
#define CGEMM_DEFAULT_UNROLL_M 2
|
||||||
#define CGEMM_DEFAULT_UNROLL_N 2
|
#define CGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
@ -2239,17 +2239,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ZGEMM_DEFAULT_UNROLL_N 2
|
#define ZGEMM_DEFAULT_UNROLL_N 2
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 128
|
#define SGEMM_DEFAULT_P 128
|
||||||
#define DGEMM_DEFAULT_P 512
|
#define DGEMM_DEFAULT_P 256
|
||||||
#define CGEMM_DEFAULT_P 96
|
#define CGEMM_DEFAULT_P 96
|
||||||
#define ZGEMM_DEFAULT_P 64
|
#define ZGEMM_DEFAULT_P 64
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 240
|
#define SGEMM_DEFAULT_Q 240
|
||||||
#define DGEMM_DEFAULT_Q 480
|
#define DGEMM_DEFAULT_Q 1024
|
||||||
#define CGEMM_DEFAULT_Q 120
|
#define CGEMM_DEFAULT_Q 120
|
||||||
#define ZGEMM_DEFAULT_Q 120
|
#define ZGEMM_DEFAULT_Q 120
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 12288
|
#define SGEMM_DEFAULT_R 12288
|
||||||
#define DGEMM_DEFAULT_R 8192
|
#define DGEMM_DEFAULT_R 4096
|
||||||
#define CGEMM_DEFAULT_R 4096
|
#define CGEMM_DEFAULT_R 4096
|
||||||
#define ZGEMM_DEFAULT_R 4096
|
#define ZGEMM_DEFAULT_R 4096
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue