commit
42f0201e21
|
@ -4,7 +4,15 @@
|
||||||
#else
|
#else
|
||||||
#include "config_kernel.h"
|
#include "config_kernel.h"
|
||||||
#endif
|
#endif
|
||||||
#include "common.h"
|
#if (defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)) && defined(__64BIT__)
|
||||||
|
typedef long long BLASLONG;
|
||||||
|
typedef unsigned long long BLASULONG;
|
||||||
|
#else
|
||||||
|
typedef long BLASLONG;
|
||||||
|
typedef unsigned long BLASULONG;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "param.h"
|
||||||
|
|
||||||
int main(int argc, char **argv) {
|
int main(int argc, char **argv) {
|
||||||
|
|
||||||
|
|
|
@ -169,8 +169,13 @@ ZROTKERNEL = zrot.c
|
||||||
#
|
#
|
||||||
SSCALKERNEL = sscal.c
|
SSCALKERNEL = sscal.c
|
||||||
DSCALKERNEL = dscal.c
|
DSCALKERNEL = dscal.c
|
||||||
|
ifeq ($(C_COMPILER), PGI)
|
||||||
|
CSCALKERNEL = ../arm/zscal.c
|
||||||
|
ZSCALKERNEL = ../arm/zscal.c
|
||||||
|
else
|
||||||
CSCALKERNEL = zscal.c
|
CSCALKERNEL = zscal.c
|
||||||
ZSCALKERNEL = zscal.c
|
ZSCALKERNEL = zscal.c
|
||||||
|
endif
|
||||||
#
|
#
|
||||||
SSWAPKERNEL = sswap.c
|
SSWAPKERNEL = sswap.c
|
||||||
DSWAPKERNEL = dswap.c
|
DSWAPKERNEL = dswap.c
|
||||||
|
|
|
@ -242,8 +242,13 @@ ZROTKERNEL = zrot.c
|
||||||
#
|
#
|
||||||
SSCALKERNEL = sscal.c
|
SSCALKERNEL = sscal.c
|
||||||
DSCALKERNEL = dscal.c
|
DSCALKERNEL = dscal.c
|
||||||
|
ifeq ($(C_COMPILER), PGI)
|
||||||
|
CSCALKERNEL = ../arm/zscal.c
|
||||||
|
ZSCALKERNEL = ../arm/zscal.c
|
||||||
|
else
|
||||||
CSCALKERNEL = zscal.c
|
CSCALKERNEL = zscal.c
|
||||||
ZSCALKERNEL = zscal.c
|
ZSCALKERNEL = zscal.c
|
||||||
|
endif
|
||||||
#
|
#
|
||||||
SSWAPKERNEL = sswap.c
|
SSWAPKERNEL = sswap.c
|
||||||
DSWAPKERNEL = dswap.c
|
DSWAPKERNEL = dswap.c
|
||||||
|
|
|
@ -166,8 +166,13 @@ ZROTKERNEL = zrot.c
|
||||||
#
|
#
|
||||||
SSCALKERNEL = sscal.c
|
SSCALKERNEL = sscal.c
|
||||||
DSCALKERNEL = dscal.c
|
DSCALKERNEL = dscal.c
|
||||||
|
ifeq ($(C_COMPILER), PGI)
|
||||||
|
CSCALKERNEL = ../arm/zscal.c
|
||||||
|
ZSCALKERNEL = ../arm/zscal.c
|
||||||
|
else
|
||||||
CSCALKERNEL = zscal.c
|
CSCALKERNEL = zscal.c
|
||||||
ZSCALKERNEL = zscal.c
|
ZSCALKERNEL = zscal.c
|
||||||
|
endif
|
||||||
#
|
#
|
||||||
SSWAPKERNEL = sswap.c
|
SSWAPKERNEL = sswap.c
|
||||||
DSWAPKERNEL = dswap.c
|
DSWAPKERNEL = dswap.c
|
||||||
|
|
|
@ -491,4 +491,3 @@ SSUMKERNEL = ../arm/sum.c
|
||||||
DSUMKERNEL = ../arm/sum.c
|
DSUMKERNEL = ../arm/sum.c
|
||||||
|
|
||||||
SOMATCOPY_RT = omatcopy_rt.c
|
SOMATCOPY_RT = omatcopy_rt.c
|
||||||
DOMATCOPY_RT = omatcopy_rt.c
|
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
/* the direct sgemm code written by Arjan van der Ven */
|
/* the direct sgemm code written by Arjan van der Ven */
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
||||||
|
@ -472,7 +472,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG s
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
#include "common.h"
|
|
||||||
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
|
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
|
||||||
{}
|
{}
|
||||||
#endif
|
#endif
|
||||||
|
|
6
param.h
6
param.h
|
@ -2466,13 +2466,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_P 512
|
#define SGEMM_DEFAULT_P 512
|
||||||
#define DGEMM_DEFAULT_P 384
|
#define DGEMM_DEFAULT_P 384
|
||||||
#define CGEMM_DEFAULT_P 512
|
#define CGEMM_DEFAULT_P 512
|
||||||
#define ZGEMM_DEFAULT_P 256
|
#define ZGEMM_DEFAULT_P 256
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_Q 512
|
#define SGEMM_DEFAULT_Q 512
|
||||||
#define DGEMM_DEFAULT_Q 512
|
#define DGEMM_DEFAULT_Q 512
|
||||||
#define CGEMM_DEFAULT_Q 1026
|
#define CGEMM_DEFAULT_Q 384
|
||||||
#define ZGEMM_DEFAULT_Q 1026
|
#define ZGEMM_DEFAULT_Q 384
|
||||||
|
|
||||||
#define SGEMM_DEFAULT_R 4096
|
#define SGEMM_DEFAULT_R 4096
|
||||||
#define DGEMM_DEFAULT_R 4096
|
#define DGEMM_DEFAULT_R 4096
|
||||||
|
|
Loading…
Reference in New Issue