From 5f916689048127df9f6060bb4edc30ab6c006413 Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Thu, 20 May 2021 11:24:31 +0000 Subject: [PATCH] Small Matrix: skylakex: sgemm nn: fix n6 conflicts with n4 --- .../x86_64/sgemm_small_kernel_nn_skylakex.c | 62 ------------------- 1 file changed, 62 deletions(-) diff --git a/kernel/x86_64/sgemm_small_kernel_nn_skylakex.c b/kernel/x86_64/sgemm_small_kernel_nn_skylakex.c index 99856d0af..9bc7a7c58 100644 --- a/kernel/x86_64/sgemm_small_kernel_nn_skylakex.c +++ b/kernel/x86_64/sgemm_small_kernel_nn_skylakex.c @@ -191,26 +191,6 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp STORE_512(0, 4); STORE_512(1, 4); STORE_512(0, 5); STORE_512(1, 5); } - for (;j < n4; j += 4) { - DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(1, 0); - DECLARE_RESULT_512(0, 1); DECLARE_RESULT_512(1, 1); - DECLARE_RESULT_512(0, 2); DECLARE_RESULT_512(1, 2); - DECLARE_RESULT_512(0, 3); DECLARE_RESULT_512(1, 3); - for (k = 0; k < K; k++) { - LOAD_A_512(0, x); LOAD_A_512(1, x); - BROADCAST_LOAD_B_512(x, 0); BROADCAST_LOAD_B_512(x, 1); - BROADCAST_LOAD_B_512(x, 2); BROADCAST_LOAD_B_512(x, 3); - - MATMUL_512(0, 0); MATMUL_512(1, 0); - MATMUL_512(0, 1); MATMUL_512(1, 1); - MATMUL_512(0, 2); MATMUL_512(1, 2); - MATMUL_512(0, 3); MATMUL_512(1, 3); - } - STORE_512(0, 0); STORE_512(1, 0); - STORE_512(0, 1); STORE_512(1, 1); - STORE_512(0, 2); STORE_512(1, 2); - STORE_512(0, 3); STORE_512(1, 3); - } for (; j < n2; j += 2) { DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(1, 0); DECLARE_RESULT_512(0, 1); DECLARE_RESULT_512(1, 1); @@ -261,27 +241,6 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp STORE_512(0, 4); STORE_512(0, 5); } - for (; j < n4; j += 4) { - DECLARE_RESULT_512(0, 0); - DECLARE_RESULT_512(0, 1); - DECLARE_RESULT_512(0, 2); - DECLARE_RESULT_512(0, 3); - for (k = 0; k < K; k++) { - LOAD_A_512(0, x); - BROADCAST_LOAD_B_512(x, 0); BROADCAST_LOAD_B_512(x, 1); - BROADCAST_LOAD_B_512(x, 2); BROADCAST_LOAD_B_512(x, 3); - - MATMUL_512(0, 0); - MATMUL_512(0, 1); - MATMUL_512(0, 2); - MATMUL_512(0, 3); - } - STORE_512(0, 0); - STORE_512(0, 1); - STORE_512(0, 2); - STORE_512(0, 3); - } - for (; j < n2; j += 2) { DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(0, 1); @@ -335,27 +294,6 @@ int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT * A, BLASLONG lda, FLOAT alp MASK_STORE_512(0, 4); MASK_STORE_512(0, 5); } - for (; j < n4; j += 4) { - DECLARE_RESULT_512(0, 0); - DECLARE_RESULT_512(0, 1); - DECLARE_RESULT_512(0, 2); - DECLARE_RESULT_512(0, 3); - for (k = 0; k < K; k++) { - MASK_LOAD_A_512(0, x); - BROADCAST_LOAD_B_512(x, 0); BROADCAST_LOAD_B_512(x, 1); - BROADCAST_LOAD_B_512(x, 2); BROADCAST_LOAD_B_512(x, 3); - - MATMUL_512(0, 0); - MATMUL_512(0, 1); - MATMUL_512(0, 2); - MATMUL_512(0, 3); - } - MASK_STORE_512(0, 0); - MASK_STORE_512(0, 1); - MASK_STORE_512(0, 2); - MASK_STORE_512(0, 3); - } - for (; j < n2; j += 2) { DECLARE_RESULT_512(0, 0); DECLARE_RESULT_512(0, 1);