Move sgemm_direct_performant helper to separate file

This commit is contained in:
Martin Kroeker 2020-08-16 09:19:34 +02:00 committed by GitHub
parent 2586b26e29
commit 56d4d4f84b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 42 additions and 5 deletions

View File

@ -0,0 +1,30 @@
#include "common.h"
/* helper for the direct sgemm code written by Arjan van der Ven */
int CNAME(BLASLONG M, BLASLONG N, BLASLONG K)
{
unsigned long long mnk = M * N * K;
/* large matrixes -> not performant */
if (mnk >= 28 * 512 * 512)
return 0;
/*
* if the B matrix is not a nice multiple if 4 we get many unaligned accesses,
* and the regular sgemm copy/realignment of data pays off much quicker
*/
if ((N & 3) != 0 && (mnk >= 8 * 512 * 512))
return 0;
#ifdef SMP
/* if we can run multithreaded, the threading changes the based threshold */
if (mnk > 2 * 350 * 512 && num_cpu_avail(3)> 1)
return 0;
#endif
return 1;
}

View File

@ -1,7 +1,7 @@
#if defined(SKYLAKEX) || defined (COOPERLAKE)
/* the direct sgemm code written by Arjan van der Ven */
//#include <immintrin.h>
#include <immintrin.h>
#include "common.h"
/*
* "Direct sgemm" code. This code operates directly on the inputs and outputs
* of the sgemm call, avoiding the copies, memory realignments and threading,
@ -38,6 +38,7 @@
#define MATMUL_SCALAR(N,M) result##N##M += Aval##M * Bval##N;
#define STORE_SCALAR(N,M) R[(i+M) * strideR + j + N] = result##N##M;
#if 0
int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
{
unsigned long long mnk = M * N * K;
@ -61,9 +62,10 @@ int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K)
return 1;
}
#endif
void sgemm_kernel_direct (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
//void sgemm_kernel_direct (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
{
int i, j, k;
@ -465,3 +467,8 @@ void sgemm_kernel_direct (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict
}
}
}
#else
#include "common.h"
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
{}
#endif