Merge pull request #3547 from martin-frbg/issue3540-2
More build fixes for CooperLake with BFLOAT16 and DYNAMIC_ARCH
This commit is contained in:
commit
ab304cca69
|
@ -678,7 +678,7 @@ endif ()
|
||||||
set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
|
||||||
endif ()
|
endif ()
|
||||||
if (NOT DEFINED SBGEMM_SMALL_K_B0_TT)
|
if (NOT DEFINED SBGEMM_SMALL_K_B0_TT)
|
||||||
set($SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
|
||||||
endif ()
|
endif ()
|
||||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16")
|
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16")
|
||||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16")
|
GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16")
|
||||||
|
|
|
@ -1824,6 +1824,13 @@ static void init_parameter(void) {
|
||||||
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
|
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if BUILD_BFLOAT16==1
|
||||||
|
TABLE_NAME.sbgemm_r = (((BUFFER_SIZE -
|
||||||
|
((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA
|
||||||
|
+ TABLE_NAME.align) & ~TABLE_NAME.align)
|
||||||
|
) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15);
|
||||||
|
#endif
|
||||||
|
|
||||||
#if BUILD_SINGLE==1
|
#if BUILD_SINGLE==1
|
||||||
TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
|
TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
|
||||||
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
|
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
|
||||||
|
|
|
@ -13,6 +13,8 @@
|
||||||
#define ONE 1.e0f
|
#define ONE 1.e0f
|
||||||
#define ZERO 0.e0f
|
#define ZERO 0.e0f
|
||||||
|
|
||||||
|
#define SHUFFLE_MAGIC_NO (const int) 0x39
|
||||||
|
|
||||||
#undef STORE16_COMPLETE_RESULT
|
#undef STORE16_COMPLETE_RESULT
|
||||||
#undef STORE16_MASK_COMPLETE_RESULT
|
#undef STORE16_MASK_COMPLETE_RESULT
|
||||||
#undef SBGEMM_BLOCK_KERNEL_NN_32x8xK
|
#undef SBGEMM_BLOCK_KERNEL_NN_32x8xK
|
||||||
|
@ -356,7 +358,6 @@ void sbgemm_block_kernel_nn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
@ -465,7 +466,6 @@ void sbgemm_block_kernel_nn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
@ -1192,7 +1192,6 @@ void sbgemm_block_kernel_tn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
@ -1291,7 +1290,6 @@ void sbgemm_block_kernel_tn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa
|
||||||
bfloat16 * B_addr = B;
|
bfloat16 * B_addr = B;
|
||||||
float * C_addr = C;
|
float * C_addr = C;
|
||||||
|
|
||||||
int SHUFFLE_MAGIC_NO = 0x39;
|
|
||||||
BLASLONG tag_k_32x = k & (~31);
|
BLASLONG tag_k_32x = k & (~31);
|
||||||
|
|
||||||
#ifndef ONE_ALPHA
|
#ifndef ONE_ALPHA
|
||||||
|
|
Loading…
Reference in New Issue