Move "direct SGEMM" functionality out of the SkylakeX SGEMM kernel and make it available (on x86_64 targets only for now) in DYNAMIC_ARCH builds * Add sgemm_direct targets in the kernel Makefile.L3 and CMakeLists.txt * Add direct_sgemm functions to the gotoblas struct in common_param.h * Move sgemm_direct_performant helper to separate file * Update gemm.c to macros for sgemm_direct to support dynamic_arch naming via common_s,h * (Conditionally) add sgemm_direct functions in setparam-ref.c
31 lines
651 B
C
31 lines
651 B
C
#include "common.h"
|
|
/* helper for the direct sgemm code written by Arjan van der Ven */
|
|
|
|
|
|
|
|
|
|
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K)
|
|
{
|
|
unsigned long long mnk = M * N * K;
|
|
/* large matrixes -> not performant */
|
|
if (mnk >= 28 * 512 * 512)
|
|
return 0;
|
|
|
|
/*
|
|
* if the B matrix is not a nice multiple if 4 we get many unaligned accesses,
|
|
* and the regular sgemm copy/realignment of data pays off much quicker
|
|
*/
|
|
if ((N & 3) != 0 && (mnk >= 8 * 512 * 512))
|
|
return 0;
|
|
|
|
#ifdef SMP
|
|
/* if we can run multithreaded, the threading changes the based threshold */
|
|
if (mnk > 2 * 350 * 512 && num_cpu_avail(3)> 1)
|
|
return 0;
|
|
#endif
|
|
|
|
return 1;
|
|
}
|
|
|
|
|