Fix the integer overflow issue for large matrix size
For large matrix, e.g. M=N=K, and M>1290, int mnk=M*N*K will overflow. This will lead to wrong branching to single-threading. The performance is downgraded significantly. Signed-off-by: Wang, Long <long1.wang@intel.com>
This commit is contained in:
parent
73128f3883
commit
0caf1434c9
|
@ -1215,7 +1215,7 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict A, flo
|
||||||
|
|
||||||
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
||||||
{
|
{
|
||||||
int mnk = M * N * K;
|
unsigned long mnk = M * N * K;
|
||||||
/* large matrixes -> not performant */
|
/* large matrixes -> not performant */
|
||||||
if (mnk >= 28 * 512 * 512)
|
if (mnk >= 28 * 512 * 512)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -452,7 +452,7 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict__ A, f
|
||||||
|
|
||||||
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
||||||
{
|
{
|
||||||
int mnk = M * N * K;
|
unsigned long mnk = M * N * K;
|
||||||
/* large matrixes -> not performant */
|
/* large matrixes -> not performant */
|
||||||
if (mnk >= 28 * 512 * 512)
|
if (mnk >= 28 * 512 * 512)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
Loading…
Reference in New Issue