Define SBGEMM_ALIGN_K for DYNAMIC_ARCH build
This commit is contained in:
parent
843e9fd0b9
commit
4989e039a5
|
@ -50,6 +50,7 @@ typedef struct {
|
|||
#ifdef BUILD_BFLOAT16
|
||||
int sbgemm_p, sbgemm_q, sbgemm_r;
|
||||
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
|
||||
int sbgemm_align_k;
|
||||
|
||||
void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
|
@ -1193,7 +1194,6 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
#ifdef BUILD_COMPLEX16
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
#endif
|
||||
int align_k; // must be 2^n
|
||||
} gotoblas_t;
|
||||
|
||||
extern gotoblas_t *gotoblas;
|
||||
|
|
|
@ -305,13 +305,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
}
|
||||
|
||||
BLASLONG pad_min_l = min_l;
|
||||
|
||||
#if defined(HALF) && defined(DYNAMIC_ARCH)
|
||||
pad_min_l = (min_l + gotoblas->align_k - 1) & ~(gotoblas->align_k-1);
|
||||
#if defined(HALF)
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
|
||||
#else
|
||||
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
|
||||
#endif
|
||||
|
||||
#if defined(HALF) && !defined(DYNAMIC_ARCH) && defined(NEOVERSEN2)
|
||||
pad_min_l = (min_l + 3) & ~3;
|
||||
#endif
|
||||
|
||||
/* First, we have to move data A to L2 cache */
|
||||
|
|
|
@ -327,12 +327,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
|||
|
||||
BLASLONG pad_min_l = min_l;
|
||||
|
||||
#if defined(HALF) && defined(DYNAMIC_ARCH)
|
||||
pad_min_l = (min_l + gotoblas->align_k - 1) & ~(gotoblas->align_k-1);
|
||||
#if defined(HALF)
|
||||
#if defined(DYNAMIC_ARCH)
|
||||
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
|
||||
#else
|
||||
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
|
||||
#endif
|
||||
|
||||
#if defined(HALF) && !defined(DYNAMIC_ARCH) && defined(NEOVERSEN2)
|
||||
pad_min_l = (min_l + 3) & ~3;
|
||||
#endif
|
||||
|
||||
/* Determine step size in m
|
||||
|
|
|
@ -62,6 +62,8 @@ gotoblas_t TABLE_NAME = {
|
|||
MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
|
||||
#endif
|
||||
|
||||
SBGEMM_ALIGN_K,
|
||||
|
||||
sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,
|
||||
|
||||
samax_kTS, samin_kTS, smax_kTS, smin_kTS,
|
||||
|
@ -973,12 +975,6 @@ static void init_parameter(void) {
|
|||
TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(NEOVERSEN2) && BUILD_BFLOAT16 == 1
|
||||
TABLE_NAME.align_k = 4;
|
||||
#else
|
||||
TABLE_NAME.align_k = 1;
|
||||
#endif
|
||||
|
||||
}
|
||||
#else // (ARCH_ARM64)
|
||||
|
|
5
param.h
5
param.h
|
@ -79,6 +79,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SBGEMM_DEFAULT_P 256
|
||||
#define SBGEMM_DEFAULT_R 256
|
||||
#define SBGEMM_DEFAULT_Q 256
|
||||
#define SBGEMM_ALIGN_K 1 // must be 2^x
|
||||
|
||||
#ifdef OPTERON
|
||||
|
||||
#define SNUMOPT 4
|
||||
|
@ -3394,6 +3396,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
|
|||
|
||||
#elif defined(NEOVERSEN2)
|
||||
|
||||
#undef SBGEMM_ALIGN_K
|
||||
#define SBGEMM_ALIGN_K 4
|
||||
|
||||
#undef SBGEMM_DEFAULT_UNROLL_M
|
||||
#undef SBGEMM_DEFAULT_UNROLL_N
|
||||
#define SBGEMM_DEFAULT_UNROLL_M 8
|
||||
|
|
Loading…
Reference in New Issue