From 6bd67ddbab5ef752e1cafca4e4b7b66ecbb57452 Mon Sep 17 00:00:00 2001 From: wjc404 <52632443+wjc404@users.noreply.github.com> Date: Wed, 16 Oct 2019 03:20:08 +0800 Subject: [PATCH] Update dgemm_kernel_8x8_skylakex.c --- kernel/x86_64/dgemm_kernel_8x8_skylakex.c | 38 +---------------------- 1 file changed, 1 insertion(+), 37 deletions(-) diff --git a/kernel/x86_64/dgemm_kernel_8x8_skylakex.c b/kernel/x86_64/dgemm_kernel_8x8_skylakex.c index b4a87cbce..69437e665 100644 --- a/kernel/x86_64/dgemm_kernel_8x8_skylakex.c +++ b/kernel/x86_64/dgemm_kernel_8x8_skylakex.c @@ -1,4 +1,5 @@ #include "common.h" +#include #include /* row-major c_block */ /* 64-bit pointer registers: a_block_pointer,b_block_pointer,c_pointer;*/ @@ -289,43 +290,6 @@ INNER_TRANS_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)\ INNER_STORE_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31) -#define COMPUTE_m1n8 {\ - __asm__ __volatile__(\ - INNER_INIT_m1n8\ - INNER_KERNELm1(8)\ - INNER_SAVE_m1n8\ - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\ - :"zmm4","zmm5","zmm6","zmm7","zmm8","cc","memory","k1");\ - c_pointer += 1;\ -} -#define COMPUTE_m2n8 {\ - __asm__ __volatile__(\ - INNER_INIT_m2n8\ - INNER_KERNELm2(8)\ - INNER_SAVE_m2n8\ - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\ - :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","cc","memory","k1");\ - c_pointer += 2;\ -} -#define COMPUTE_m4n8 {\ - __asm__ __volatile__(\ - INNER_INIT_m4n8\ - INNER_KERNELm4(8)\ - INNER_SAVE_m4n8\ - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03),"Yk"(k01)\ - :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","cc","memory");\ - c_pointer += 4;\ -} -#define COMPUTE_m8n8 {\ - __asm__ __volatile__(\ - INNER_INIT_m8n8\ - INNER_KERNELm8(8)\ - INNER_SAVE_m8n8\ - :"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03)\ - :"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory");\ - c_pointer += 8;\ -} - #define COMPUTE_n8 {\ __asm__ __volatile__(\ "movq %8,%%r14;movq %2,%%r13;"\