sbgemm: spr: tuning for blocking params
This commit is contained in:
parent
a70bfb52d5
commit
0abbcd19c1
14
param.h
14
param.h
|
@ -1771,6 +1771,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#endif
|
#endif
|
||||||
#define USE_SGEMM_KERNEL_DIRECT 1
|
#define USE_SGEMM_KERNEL_DIRECT 1
|
||||||
|
|
||||||
|
#undef SBGEMM_DEFAULT_UNROLL_N
|
||||||
|
#undef SBGEMM_DEFAULT_UNROLL_M
|
||||||
|
#undef SBGEMM_DEFAULT_P
|
||||||
|
#undef SBGEMM_DEFAULT_R
|
||||||
|
#undef SBGEMM_DEFAULT_Q
|
||||||
|
// FIXME: actually UNROLL_M = UNROLL_N = 16
|
||||||
|
// If M and N is equal, OpenBLAS will reuse OCOPY as ICOPY.
|
||||||
|
// But for AMX, they are not the same, set UNROLL_M = 32 to workaround
|
||||||
|
#define SBGEMM_DEFAULT_UNROLL_N 16
|
||||||
|
#define SBGEMM_DEFAULT_UNROLL_M 32
|
||||||
|
#define SBGEMM_DEFAULT_P 192
|
||||||
|
#define SBGEMM_DEFAULT_Q 1024
|
||||||
|
#define SBGEMM_DEFAULT_R sbgemm_r
|
||||||
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_UNROLL_M 4
|
#define SGEMM_DEFAULT_UNROLL_M 4
|
||||||
|
|
Loading…
Reference in New Issue