Move sgemm_direct_performant helper to separate file
This commit is contained in:
parent
2586b26e29
commit
56d4d4f84b
|
@ -0,0 +1,30 @@
|
|||
#include "common.h"
|
||||
/* helper for the direct sgemm code written by Arjan van der Ven */
|
||||
|
||||
|
||||
|
||||
|
||||
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K)
|
||||
{
|
||||
unsigned long long mnk = M * N * K;
|
||||
/* large matrixes -> not performant */
|
||||
if (mnk >= 28 * 512 * 512)
|
||||
return 0;
|
||||
|
||||
/*
|
||||
* if the B matrix is not a nice multiple if 4 we get many unaligned accesses,
|
||||
* and the regular sgemm copy/realignment of data pays off much quicker
|
||||
*/
|
||||
if ((N & 3) != 0 && (mnk >= 8 * 512 * 512))
|
||||
return 0;
|
||||
|
||||
#ifdef SMP
|
||||
/* if we can run multithreaded, the threading changes the based threshold */
|
||||
if (mnk > 2 * 350 * 512 && num_cpu_avail(3)> 1)
|
||||
return 0;
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
|
||||
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
||||
/* the direct sgemm code written by Arjan van der Ven */
|
||||
//#include <immintrin.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "common.h"
|
||||
/*
|
||||
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
||||
* of the sgemm call, avoiding the copies, memory realignments and threading,
|
||||
|
@ -38,6 +38,7 @@
|
|||
#define MATMUL_SCALAR(N,M) result##N##M += Aval##M * Bval##N;
|
||||
#define STORE_SCALAR(N,M) R[(i+M) * strideR + j + N] = result##N##M;
|
||||
|
||||
#if 0
|
||||
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
||||
{
|
||||
unsigned long long mnk = M * N * K;
|
||||
|
@ -61,9 +62,10 @@ int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
|
|||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
void sgemm_kernel_direct (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
|
||||
//void sgemm_kernel_direct (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
|
||||
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
|
||||
{
|
||||
int i, j, k;
|
||||
|
||||
|
@ -465,3 +467,8 @@ void sgemm_kernel_direct (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict
|
|||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
#include "common.h"
|
||||
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
|
||||
{}
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue