Update dgemm_kernel_8x8_skylakex.c

This commit is contained in:
wjc404 2019-10-16 10:14:51 +08:00 committed by GitHub
parent 6bd67ddbab
commit 9b19e9e1b0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 6 additions and 4 deletions

View File

@ -429,7 +429,8 @@ static void KERNEL_MAIN(double *packed_a, double *packed_b, BLASLONG m, BLASLONG
double *c_pointer = c;
__mmask16 k01 = 0x00f0,k02 = 0x000f,k03 = 0x0033;
BLASLONG ndiv8_count;
double *b_scratch = (double *)aligned_alloc(64,192*k);
double *b_scratch;
posix_memalign(&b_scratch,64,192*k);
double *packed_b_pointer = packed_b;
a_block_pointer = packed_a;
for(ndiv8_count=ndiv8;ndiv8_count>2;ndiv8_count-=3){
@ -637,9 +638,10 @@ static void KERNEL_MAIN(double *packed_a, double *packed_b, BLASLONG m, BLASLONG
c_pointer ++;\
}
#define SAVE_m1n4 {\
*c_pointer += _mm256_cvtsd_f64(yc1);\
ya1 = _mm256_unpackhi_pd(yc1,yc1);\
c_pointer[LDC] += _mm256_cvtsd_f64(ya1);\
xb1 = _mm256_extractf128_pd(yc1,0);\
*c_pointer += _mm_cvtsd_f64(xb1);\
xb2 = _mm_unpackhi_pd(xb1,xb1);\
c_pointer[LDC] += _mm_cvtsd_f64(xb2);\
xb1 = _mm256_extractf128_pd(yc1,1);\
c_pointer[LDC*2] += _mm_cvtsd_f64(xb1);\
xb2 = _mm_unpackhi_pd(xb1,xb1);\