Fix the integer overflow issue for large matrix size
For large matrix, e.g. M=N=K, and M>1290, int mnk=M*N*K will overflow. This will lead to wrong branching to single-threading. The performance is downgraded significantly. Signed-off-by: Wang, Long <long1.wang@intel.com>
This commit is contained in:
parent
73128f3883
commit
0caf1434c9
|
@ -1215,7 +1215,7 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict A, flo
|
|||
|
||||
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
||||
{
|
||||
int mnk = M * N * K;
|
||||
unsigned long mnk = M * N * K;
|
||||
/* large matrixes -> not performant */
|
||||
if (mnk >= 28 * 512 * 512)
|
||||
return 0;
|
||||
|
|
|
@ -452,7 +452,7 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict__ A, f
|
|||
|
||||
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
||||
{
|
||||
int mnk = M * N * K;
|
||||
unsigned long mnk = M * N * K;
|
||||
/* large matrixes -> not performant */
|
||||
if (mnk >= 28 * 512 * 512)
|
||||
return 0;
|
||||
|
|
Loading…
Reference in New Issue