diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 8aa6728d5..98c803e71 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -678,7 +678,7 @@ endif () set(SBGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c) endif () if (NOT DEFINED SBGEMM_SMALL_K_B0_TT) - set($SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) + set(SBGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c) endif () GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "BFLOAT16") GenerateNamedObjects("${KERNELDIR}/${SBGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "BFLOAT16") diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index fe796be64..a81b32ddc 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -1824,6 +1824,13 @@ static void init_parameter(void) { fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); #endif +#if BUILD_BFLOAT16==1 + TABLE_NAME.sbgemm_r = (((BUFFER_SIZE - + ((TABLE_NAME.sbgemm_p * TABLE_NAME.sbgemm_q * 4 + TABLE_NAME.offsetA + + TABLE_NAME.align) & ~TABLE_NAME.align) + ) / (TABLE_NAME.sbgemm_q * 4) - 15) & ~15); +#endif + #if BUILD_SINGLE==1 TABLE_NAME.sgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA diff --git a/kernel/x86_64/sbgemm_microk_cooperlake_template.c b/kernel/x86_64/sbgemm_microk_cooperlake_template.c index b8ed9838e..bd5cbb744 100644 --- a/kernel/x86_64/sbgemm_microk_cooperlake_template.c +++ b/kernel/x86_64/sbgemm_microk_cooperlake_template.c @@ -13,6 +13,8 @@ #define ONE 1.e0f #define ZERO 0.e0f +#define SHUFFLE_MAGIC_NO (const int) 0x39 + #undef STORE16_COMPLETE_RESULT #undef STORE16_MASK_COMPLETE_RESULT #undef SBGEMM_BLOCK_KERNEL_NN_32x8xK @@ -356,7 +358,6 @@ void sbgemm_block_kernel_nn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa bfloat16 * B_addr = B; float * C_addr = C; - int SHUFFLE_MAGIC_NO = 0x39; BLASLONG tag_k_32x = k & (~31); #ifndef ONE_ALPHA @@ -465,7 +466,6 @@ void sbgemm_block_kernel_nn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa bfloat16 * B_addr = B; float * C_addr = C; - int SHUFFLE_MAGIC_NO = 0x39; BLASLONG tag_k_32x = k & (~31); #ifndef ONE_ALPHA @@ -1192,7 +1192,6 @@ void sbgemm_block_kernel_tn_32xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa bfloat16 * B_addr = B; float * C_addr = C; - int SHUFFLE_MAGIC_NO = 0x39; BLASLONG tag_k_32x = k & (~31); #ifndef ONE_ALPHA @@ -1291,7 +1290,6 @@ void sbgemm_block_kernel_tn_16xNx32_one(BLASLONG m, BLASLONG n, BLASLONG k, floa bfloat16 * B_addr = B; float * C_addr = C; - int SHUFFLE_MAGIC_NO = 0x39; BLASLONG tag_k_32x = k & (~31); #ifndef ONE_ALPHA