Define SBGEMM_ALIGN_K for DYNAMIC_ARCH build

This commit is contained in:
Honglin Zhu 2022-10-27 14:10:26 +08:00
parent 843e9fd0b9
commit 4989e039a5
5 changed files with 18 additions and 18 deletions

View File

@ -50,6 +50,7 @@ typedef struct {
#ifdef BUILD_BFLOAT16
int sbgemm_p, sbgemm_q, sbgemm_r;
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
int sbgemm_align_k;
void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
@ -1193,7 +1194,6 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
#ifdef BUILD_COMPLEX16
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#endif
int align_k; // must be 2^n
} gotoblas_t;
extern gotoblas_t *gotoblas;

View File

@ -305,13 +305,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
}
BLASLONG pad_min_l = min_l;
#if defined(HALF) && defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->align_k - 1) & ~(gotoblas->align_k-1);
#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif
#if defined(HALF) && !defined(DYNAMIC_ARCH) && defined(NEOVERSEN2)
pad_min_l = (min_l + 3) & ~3;
#endif
/* First, we have to move data A to L2 cache */

View File

@ -327,12 +327,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
BLASLONG pad_min_l = min_l;
#if defined(HALF) && defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->align_k - 1) & ~(gotoblas->align_k-1);
#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif
#if defined(HALF) && !defined(DYNAMIC_ARCH) && defined(NEOVERSEN2)
pad_min_l = (min_l + 3) & ~3;
#endif
/* Determine step size in m

View File

@ -62,6 +62,8 @@ gotoblas_t TABLE_NAME = {
MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
#endif
SBGEMM_ALIGN_K,
sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
samax_kTS, samin_kTS, smax_kTS, smin_kTS,
@ -973,12 +975,6 @@ static void init_parameter(void) {
TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
#endif
#endif
#if defined(NEOVERSEN2) && BUILD_BFLOAT16 == 1
TABLE_NAME.align_k = 4;
#else
TABLE_NAME.align_k = 1;
#endif
}
#else // (ARCH_ARM64)

View File

@ -79,6 +79,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SBGEMM_DEFAULT_P 256
#define SBGEMM_DEFAULT_R 256
#define SBGEMM_DEFAULT_Q 256
#define SBGEMM_ALIGN_K 1 // must be 2^x
#ifdef OPTERON
#define SNUMOPT 4
@ -3394,6 +3396,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
#elif defined(NEOVERSEN2)
#undef SBGEMM_ALIGN_K
#define SBGEMM_ALIGN_K 4
#undef SBGEMM_DEFAULT_UNROLL_M
#undef SBGEMM_DEFAULT_UNROLL_N
#define SBGEMM_DEFAULT_UNROLL_M 8