Update dgemm_kernel_8x8_skylakex.c
This commit is contained in:
parent
5da9484d93
commit
6bd67ddbab
|
@ -1,4 +1,5 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
#include <stdint.h>
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
/* row-major c_block */
|
/* row-major c_block */
|
||||||
/* 64-bit pointer registers: a_block_pointer,b_block_pointer,c_pointer;*/
|
/* 64-bit pointer registers: a_block_pointer,b_block_pointer,c_pointer;*/
|
||||||
|
@ -289,43 +290,6 @@
|
||||||
INNER_TRANS_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)\
|
INNER_TRANS_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)\
|
||||||
INNER_STORE_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)
|
INNER_STORE_8x8(%%zmm10,%%zmm13,%%zmm16,%%zmm19,%%zmm22,%%zmm25,%%zmm28,%%zmm31)
|
||||||
|
|
||||||
#define COMPUTE_m1n8 {\
|
|
||||||
__asm__ __volatile__(\
|
|
||||||
INNER_INIT_m1n8\
|
|
||||||
INNER_KERNELm1(8)\
|
|
||||||
INNER_SAVE_m1n8\
|
|
||||||
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\
|
|
||||||
:"zmm4","zmm5","zmm6","zmm7","zmm8","cc","memory","k1");\
|
|
||||||
c_pointer += 1;\
|
|
||||||
}
|
|
||||||
#define COMPUTE_m2n8 {\
|
|
||||||
__asm__ __volatile__(\
|
|
||||||
INNER_INIT_m2n8\
|
|
||||||
INNER_KERNELm2(8)\
|
|
||||||
INNER_SAVE_m2n8\
|
|
||||||
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes)\
|
|
||||||
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","cc","memory","k1");\
|
|
||||||
c_pointer += 2;\
|
|
||||||
}
|
|
||||||
#define COMPUTE_m4n8 {\
|
|
||||||
__asm__ __volatile__(\
|
|
||||||
INNER_INIT_m4n8\
|
|
||||||
INNER_KERNELm4(8)\
|
|
||||||
INNER_SAVE_m4n8\
|
|
||||||
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03),"Yk"(k01)\
|
|
||||||
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","cc","memory");\
|
|
||||||
c_pointer += 4;\
|
|
||||||
}
|
|
||||||
#define COMPUTE_m8n8 {\
|
|
||||||
__asm__ __volatile__(\
|
|
||||||
INNER_INIT_m8n8\
|
|
||||||
INNER_KERNELm8(8)\
|
|
||||||
INNER_SAVE_m8n8\
|
|
||||||
:"+r"(a_block_pointer):"r"(packed_b_pointer),"r"((int64_t)k),"r"(c_pointer),"r"(ldc_in_bytes),"Yk"(k02),"Yk"(k03)\
|
|
||||||
:"zmm4","zmm5","zmm6","zmm7","zmm8","zmm9","zmm10","zmm11","zmm12","zmm13","zmm14","zmm15","cc","memory");\
|
|
||||||
c_pointer += 8;\
|
|
||||||
}
|
|
||||||
|
|
||||||
#define COMPUTE_n8 {\
|
#define COMPUTE_n8 {\
|
||||||
__asm__ __volatile__(\
|
__asm__ __volatile__(\
|
||||||
"movq %8,%%r14;movq %2,%%r13;"\
|
"movq %8,%%r14;movq %2,%%r13;"\
|
||||||
|
|
Loading…
Reference in New Issue