Rename "HALF" and "sh" to "BFLOAT16" and "sb"
This commit is contained in:
parent
5800758b43
commit
ca31c32693
2
cblas.h
2
cblas.h
|
@ -392,7 +392,7 @@ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE
|
|||
/* convert BFLOAT16 array to double array */
|
||||
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
|
||||
/* dot production of BFLOAT16 input arrays, and output as float */
|
||||
float cblas_shdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
||||
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
4
common.h
4
common.h
|
@ -260,7 +260,7 @@ typedef unsigned long BLASULONG;
|
|||
#ifndef BFLOAT16
|
||||
#include <stdint.h>
|
||||
typedef uint16_t bfloat16;
|
||||
#define HALFCONVERSION 1
|
||||
#define BFLOAT16CONVERSION 1
|
||||
#endif
|
||||
|
||||
#ifdef USE64BITINT
|
||||
|
@ -303,7 +303,7 @@ typedef int blasint;
|
|||
#define SIZE 8
|
||||
#define BASE_SHIFT 3
|
||||
#define ZBASE_SHIFT 4
|
||||
#elif defined(HALF)
|
||||
#elif defined(BFLOAT16)
|
||||
#define IFLOAT bfloat16
|
||||
#define XFLOAT IFLOAT
|
||||
#define FLOAT float
|
||||
|
|
|
@ -54,7 +54,7 @@ double BLASFUNC(dsdot) (blasint *, float *, blasint *, float *, blasint *);
|
|||
double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
float BLASFUNC(shdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *);
|
||||
float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *);
|
||||
void BLASFUNC(shstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *);
|
||||
void BLASFUNC(shdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *);
|
||||
void BLASFUNC(sbf16tos) (blasint *, bfloat16 *, blasint *, float *, blasint *);
|
||||
|
@ -474,7 +474,7 @@ void BLASFUNC(xhbmv)(char *, blasint *, blasint *, xdouble *, xdouble *, blasint
|
|||
|
||||
/* Level 3 routines */
|
||||
|
||||
void BLASFUNC(shgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
||||
void BLASFUNC(sbgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
||||
bfloat16 *, blasint *, bfloat16 *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(sgemm)(char *, char *, blasint *, blasint *, blasint *, float *,
|
||||
float *, blasint *, float *, blasint *, float *, float *, blasint *);
|
||||
|
|
|
@ -46,7 +46,7 @@ float sdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
|||
double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
float shdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
float sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
|
||||
void shstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
void shdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
|
|
|
@ -55,7 +55,7 @@ void sgemm_direct(BLASLONG M, BLASLONG N, BLASLONG K,
|
|||
int sgemm_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
|
||||
|
||||
|
||||
int shgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
int sbgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
|
||||
int sgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
@ -78,10 +78,10 @@ int xgemm_beta(BLASLONG, BLASLONG, BLASLONG, xdouble *,
|
|||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
|
||||
#endif
|
||||
|
||||
int shgemm_incopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int shgemm_itcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int shgemm_oncopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int shgemm_otcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int sbgemm_incopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int sbgemm_itcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int sbgemm_oncopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int sbgemm_otcopy(BLASLONG m, BLASLONG n, bfloat16 *a, BLASLONG lda, bfloat16 *b);
|
||||
int sgemm_incopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
||||
int sgemm_itcopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
||||
int sgemm_oncopy(BLASLONG m, BLASLONG n, float *a, BLASLONG lda, float *b);
|
||||
|
@ -505,7 +505,7 @@ int xher2k_kernel_UC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdoubl
|
|||
int xher2k_kernel_LN(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
|
||||
int xher2k_kernel_LC(BLASLONG m, BLASLONG n, BLASLONG k, xdouble alpha_r, xdouble alpha_i, xdouble *a, xdouble *b, xdouble *c, BLASLONG ldc, BLASLONG offset, int flag);
|
||||
|
||||
int shgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
||||
int sbgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
||||
int sgemm_kernel(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
|
||||
int dgemm_kernel(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
|
||||
|
||||
|
@ -534,10 +534,10 @@ int cgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float
|
|||
int zgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, double, double, double *, double *, double *, BLASLONG);
|
||||
int xgemm3m_kernel(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, xdouble *, xdouble *, BLASLONG);
|
||||
|
||||
int shgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int shgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int shgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int shgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
|
||||
int sgemm_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
int sgemm_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
|
@ -631,10 +631,10 @@ int xgemm_cr(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLON
|
|||
int xgemm_cc(blas_arg_t *, BLASLONG *, BLASLONG *, xdouble *, xdouble *, BLASLONG);
|
||||
#endif
|
||||
|
||||
int shgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int shgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int shgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int shgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_thread_tn(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
int sbgemm_thread_tt(blas_arg_t *, BLASLONG *, BLASLONG *, bfloat16 *, bfloat16 *, BLASLONG);
|
||||
|
||||
int sgemm_thread_nn(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
int sgemm_thread_nt(blas_arg_t *, BLASLONG *, BLASLONG *, float *, float *, BLASLONG);
|
||||
|
|
|
@ -644,7 +644,7 @@
|
|||
|
||||
#define GEADD_K DGEADD_K
|
||||
|
||||
#elif defined(HALF)
|
||||
#elif defined(BFLOAT16)
|
||||
|
||||
#define D_TO_BF16_K SHDTOBF16_K
|
||||
#define D_BF16_TO_K DBF16TOD_K
|
||||
|
@ -662,7 +662,7 @@
|
|||
#define ASUM_K SASUM_K
|
||||
#define DOTU_K SDOTU_K
|
||||
#define DOTC_K SDOTC_K
|
||||
#define BF16_DOT_K SHDOT_K
|
||||
#define BF16_DOT_K SBDOT_K
|
||||
#define AXPYU_K SAXPYU_K
|
||||
#define AXPYC_K SAXPYC_K
|
||||
#define AXPBY_K SAXPBY_K
|
||||
|
@ -682,32 +682,32 @@
|
|||
#define NRM2_K SNRM2_K
|
||||
#define SYMV_THREAD_U SSYMV_THREAD_U
|
||||
#define SYMV_THREAD_L SSYMV_THREAD_L
|
||||
#define GEMM_BETA SHGEMM_BETA
|
||||
#define GEMM_KERNEL_N SHGEMM_KERNEL
|
||||
#define GEMM_KERNEL_L SHGEMM_KERNEL
|
||||
#define GEMM_KERNEL_R SHGEMM_KERNEL
|
||||
#define GEMM_KERNEL_B SHGEMM_KERNEL
|
||||
#define GEMM_BETA SBGEMM_BETA
|
||||
#define GEMM_KERNEL_N SBGEMM_KERNEL
|
||||
#define GEMM_KERNEL_L SBGEMM_KERNEL
|
||||
#define GEMM_KERNEL_R SBGEMM_KERNEL
|
||||
#define GEMM_KERNEL_B SBGEMM_KERNEL
|
||||
|
||||
#define GEMM_NN SHGEMM_NN
|
||||
#define GEMM_CN SHGEMM_TN
|
||||
#define GEMM_TN SHGEMM_TN
|
||||
#define GEMM_NC SHGEMM_NT
|
||||
#define GEMM_NT SHGEMM_NT
|
||||
#define GEMM_CC SHGEMM_TT
|
||||
#define GEMM_CT SHGEMM_TT
|
||||
#define GEMM_TC SHGEMM_TT
|
||||
#define GEMM_TT SHGEMM_TT
|
||||
#define GEMM_NR SHGEMM_NN
|
||||
#define GEMM_TR SHGEMM_TN
|
||||
#define GEMM_CR SHGEMM_TN
|
||||
#define GEMM_RN SHGEMM_NN
|
||||
#define GEMM_RT SHGEMM_NT
|
||||
#define GEMM_RC SHGEMM_NT
|
||||
#define GEMM_RR SHGEMM_NN
|
||||
#define GEMM_ONCOPY SHGEMM_ONCOPY
|
||||
#define GEMM_OTCOPY SHGEMM_OTCOPY
|
||||
#define GEMM_INCOPY SHGEMM_INCOPY
|
||||
#define GEMM_ITCOPY SHGEMM_ITCOPY
|
||||
#define GEMM_NN SBGEMM_NN
|
||||
#define GEMM_CN SBGEMM_TN
|
||||
#define GEMM_TN SBGEMM_TN
|
||||
#define GEMM_NC SBGEMM_NT
|
||||
#define GEMM_NT SBGEMM_NT
|
||||
#define GEMM_CC SBGEMM_TT
|
||||
#define GEMM_CT SBGEMM_TT
|
||||
#define GEMM_TC SBGEMM_TT
|
||||
#define GEMM_TT SBGEMM_TT
|
||||
#define GEMM_NR SBGEMM_NN
|
||||
#define GEMM_TR SBGEMM_TN
|
||||
#define GEMM_CR SBGEMM_TN
|
||||
#define GEMM_RN SBGEMM_NN
|
||||
#define GEMM_RT SBGEMM_NT
|
||||
#define GEMM_RC SBGEMM_NT
|
||||
#define GEMM_RR SBGEMM_NN
|
||||
#define GEMM_ONCOPY SBGEMM_ONCOPY
|
||||
#define GEMM_OTCOPY SBGEMM_OTCOPY
|
||||
#define GEMM_INCOPY SBGEMM_INCOPY
|
||||
#define GEMM_ITCOPY SBGEMM_ITCOPY
|
||||
#define SYMM_THREAD_LU SSYMM_THREAD_LU
|
||||
#define SYMM_THREAD_LL SSYMM_THREAD_LL
|
||||
#define SYMM_THREAD_RU SSYMM_THREAD_RU
|
||||
|
@ -723,22 +723,22 @@
|
|||
#define HEMM_THREAD_RU SHEMM_THREAD_RU
|
||||
#define HEMM_THREAD_RL SHEMM_THREAD_RL
|
||||
|
||||
#define GEMM_THREAD_NN SHGEMM_THREAD_NN
|
||||
#define GEMM_THREAD_CN SHGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_TN SHGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_NC SHGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_NT SHGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_CC SHGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_CT SHGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_TC SHGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_TT SHGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_NR SHGEMM_THREAD_NN
|
||||
#define GEMM_THREAD_TR SHGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_CR SHGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_RN SHGEMM_THREAD_NN
|
||||
#define GEMM_THREAD_RT SHGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_RC SHGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_RR SHGEMM_THREAD_NN
|
||||
#define GEMM_THREAD_NN SBGEMM_THREAD_NN
|
||||
#define GEMM_THREAD_CN SBGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_TN SBGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_NC SBGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_NT SBGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_CC SBGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_CT SBGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_TC SBGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_TT SBGEMM_THREAD_TT
|
||||
#define GEMM_THREAD_NR SBGEMM_THREAD_NN
|
||||
#define GEMM_THREAD_TR SBGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_CR SBGEMM_THREAD_TN
|
||||
#define GEMM_THREAD_RN SBGEMM_THREAD_NN
|
||||
#define GEMM_THREAD_RT SBGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_RC SBGEMM_THREAD_NT
|
||||
#define GEMM_THREAD_RR SBGEMM_THREAD_NN
|
||||
|
||||
#ifdef UNIT
|
||||
|
||||
|
@ -2491,9 +2491,9 @@
|
|||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64)
|
||||
extern BLASLONG gemm_offset_a;
|
||||
extern BLASLONG gemm_offset_b;
|
||||
extern BLASLONG shgemm_p;
|
||||
extern BLASLONG shgemm_q;
|
||||
extern BLASLONG shgemm_r;
|
||||
extern BLASLONG sbgemm_p;
|
||||
extern BLASLONG sbgemm_q;
|
||||
extern BLASLONG sbgemm_r;
|
||||
extern BLASLONG sgemm_p;
|
||||
extern BLASLONG sgemm_q;
|
||||
extern BLASLONG sgemm_r;
|
||||
|
|
230
common_param.h
230
common_param.h
|
@ -47,9 +47,9 @@ typedef struct {
|
|||
int dtb_entries;
|
||||
int offsetA, offsetB, align;
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
int shgemm_p, shgemm_q, shgemm_r;
|
||||
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn;
|
||||
#ifdef BUILD_BFLOAT16
|
||||
int sbgemm_p, sbgemm_q, sbgemm_r;
|
||||
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
|
||||
|
||||
void (*shstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
void (*shdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
|
@ -69,8 +69,8 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
|
|||
float (*shasum_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*shsum_k) (BLASLONG, float *, BLASLONG);
|
||||
int (*shcopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float (*shdot_k) (BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
double (*dshdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float (*sbdot_k) (BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
double (*dsbdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*shrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
|
@ -78,20 +78,20 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*shscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*shswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*shgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*shgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*shger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
int (*shsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*shsymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
int (*shgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
||||
int (*shgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
|
||||
int (*sbgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG);
|
||||
int (*sbgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*shgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
int (*shgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
int (*shgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
int (*shgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
int (*sbgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
int (*sbgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
int (*sbgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
int (*sbgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *);
|
||||
|
||||
int (*shtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*shtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -147,14 +147,14 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
|
|||
|
||||
#endif
|
||||
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
int sgemm_p, sgemm_q, sgemm_r;
|
||||
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
|
||||
#endif
|
||||
|
||||
int exclusive_cache;
|
||||
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
float (*samax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*samin_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*smax_k) (BLASLONG, float *, BLASLONG);
|
||||
|
@ -167,11 +167,10 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
float (*snrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*sasum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
#ifdef BUILD_SINGLE
|
||||
float (*ssum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
@ -179,26 +178,20 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
|
||||
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
#ifdef BUILD_SINGLE
|
||||
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
|
||||
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
|
||||
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
|
||||
#ifdef ARCH_X86_64
|
||||
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
|
||||
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
|
||||
|
@ -213,8 +206,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
|
||||
#ifdef BUILD_SINGLE
|
||||
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -236,8 +228,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
#endif
|
||||
#if BUILD_SINGLE
|
||||
|
||||
int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -264,18 +255,17 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
|
|||
int (*ssymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ssymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
int (*ssymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
|
||||
|
||||
int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
int dgemm_p, dgemm_q, dgemm_r;
|
||||
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
double (*damax_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*damin_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dmax_k) (BLASLONG, double *, BLASLONG);
|
||||
|
@ -286,21 +276,21 @@ BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
|
|||
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
|
||||
double (*dasum_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
#ifdef BUILD_DOUBLE
|
||||
double (*dsum_k) (BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
|
||||
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE)
|
||||
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
|
||||
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
|
@ -308,15 +298,13 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
|||
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
#endif
|
||||
|
||||
#if BUILD_DOUBLE
|
||||
#ifdef BUILD_DOUBLE
|
||||
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
|
||||
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
|
||||
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
|
||||
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
|
||||
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
|
||||
|
||||
|
@ -325,8 +313,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
|
|||
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
|
||||
#endif
|
||||
|
||||
#if BUILD_DOUBLE
|
||||
#ifdef BUILD_DOUBLE
|
||||
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
|
||||
|
@ -473,30 +460,23 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
#ifdef BUILD_COMPLEX
|
||||
int cgemm_p, cgemm_q, cgemm_r;
|
||||
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
|
||||
|
||||
float (*camax_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*camin_k) (BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
|
||||
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*casum_k) (BLASLONG, float *, BLASLONG);
|
||||
float (*csum_k) (BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
|
||||
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)|| (BUILD_COMPLEX16)
|
||||
|
||||
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
@ -510,8 +490,6 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*cgemv_u) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemv_s) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemv_d) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)
|
||||
int (*cgeru_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgerc_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgerv_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
@ -523,14 +501,13 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*chemv_U) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*chemv_M) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*chemv_V) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
|
||||
int (*cgemm_kernel_n )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_kernel_l )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_kernel_r )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_kernel_b )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG);
|
||||
int (*cgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*cgemm_incopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*cgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
|
@ -561,8 +538,6 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*ctrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
int (*ctrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX)
|
||||
|
||||
int (*ctrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
int (*ctrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, float *, float *, BLASLONG, BLASLONG);
|
||||
|
@ -646,14 +621,12 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
|
|||
int (*chemm3m_olcopyr)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*chemm3m_oucopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
int (*chemm3m_olcopyi)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float, float, float *);
|
||||
#endif
|
||||
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
|
||||
|
||||
int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
|
||||
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
|
||||
#endif
|
||||
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
#ifdef BUILD_COMPLEX16
|
||||
int zgemm_p, zgemm_q, zgemm_r;
|
||||
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
|
||||
|
||||
|
@ -991,35 +964,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
void (*init)(void);
|
||||
|
||||
int snum_opt, dnum_opt, qnum_opt;
|
||||
|
||||
#if BUILD_SINGLE
|
||||
#ifdef BUILD_SINGLE
|
||||
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
#ifdef BUILD_DOUBLE
|
||||
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
#ifdef BUILD_COMPLEX
|
||||
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX16
|
||||
#ifdef BUILD_COMPLEX16
|
||||
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
#ifdef BUILD_SINGLE
|
||||
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_DOUBLE
|
||||
#ifdef BUILD_DOUBLE
|
||||
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX
|
||||
#ifdef BUILD_COMPLEX
|
||||
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
|
@ -1031,7 +1003,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
#ifdef BUILD_COMPLEX16
|
||||
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
|
@ -1043,21 +1015,21 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
#ifdef BUILD_SINGLE
|
||||
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_DOUBLE
|
||||
#ifdef BUILD_DOUBLE
|
||||
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX
|
||||
#ifdef BUILD_COMPLEX
|
||||
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
|
@ -1069,7 +1041,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
#ifdef BUILD_COMPLEX16
|
||||
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
|
@ -1081,16 +1053,16 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
|
|||
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
|
||||
#endif
|
||||
|
||||
#if BUILD_SINGLE
|
||||
#ifdef BUILD_SINGLE
|
||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_DOUBLE
|
||||
#ifdef BUILD_DOUBLE
|
||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX
|
||||
#ifdef BUILD_COMPLEX
|
||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
|
||||
#endif
|
||||
#if BUILD_COMPLEX16
|
||||
#ifdef BUILD_COMPLEX16
|
||||
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
|
||||
#endif
|
||||
} gotoblas_t;
|
||||
|
@ -1104,16 +1076,16 @@ extern gotoblas_t *gotoblas;
|
|||
|
||||
#define HAVE_EX_L2 gotoblas -> exclusive_cache
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
#define SHGEMM_P gotoblas -> shgemm_p
|
||||
#define SHGEMM_Q gotoblas -> shgemm_q
|
||||
#define SHGEMM_R gotoblas -> shgemm_r
|
||||
#define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m
|
||||
#define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n
|
||||
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
|
||||
#ifdef BUILD_BFLOAT16
|
||||
#define SBGEMM_P gotoblas -> sbgemm_p
|
||||
#define SBGEMM_Q gotoblas -> sbgemm_q
|
||||
#define SBGEMM_R gotoblas -> sbgemm_r
|
||||
#define SBGEMM_UNROLL_M gotoblas -> sbgemm_unroll_m
|
||||
#define SBGEMM_UNROLL_N gotoblas -> sbgemm_unroll_n
|
||||
#define SBGEMM_UNROLL_MN gotoblas -> sbgemm_unroll_mn
|
||||
#endif
|
||||
|
||||
#if (BUILD_SINGLE)
|
||||
#if defined (BUILD_SINGLE)
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R gotoblas -> sgemm_r
|
||||
|
@ -1122,21 +1094,13 @@ extern gotoblas_t *gotoblas;
|
|||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
|
||||
#endif
|
||||
|
||||
#if (BUILD_DOUBLE)
|
||||
#if defined (BUILD_DOUBLE)
|
||||
#define DGEMM_P gotoblas -> dgemm_p
|
||||
#define DGEMM_Q gotoblas -> dgemm_q
|
||||
#define DGEMM_R gotoblas -> dgemm_r
|
||||
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
|
||||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
|
||||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
|
||||
#if ! (BUILD_SINGLE)
|
||||
#define SGEMM_P gotoblas -> sgemm_p
|
||||
#define SGEMM_Q gotoblas -> sgemm_q
|
||||
#define SGEMM_R gotoblas -> sgemm_r
|
||||
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
|
||||
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
|
||||
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define QGEMM_P gotoblas -> qgemm_p
|
||||
|
@ -1146,7 +1110,7 @@ extern gotoblas_t *gotoblas;
|
|||
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
|
||||
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
|
||||
|
||||
#if BUILD_COMPLEX
|
||||
#ifdef BUILD_COMPLEX
|
||||
#define CGEMM_P gotoblas -> cgemm_p
|
||||
#define CGEMM_Q gotoblas -> cgemm_q
|
||||
#define CGEMM_R gotoblas -> cgemm_r
|
||||
|
@ -1163,7 +1127,7 @@ extern gotoblas_t *gotoblas;
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if BUILD_COMPLEX16
|
||||
#ifdef BUILD_COMPLEX16
|
||||
#define ZGEMM_P gotoblas -> zgemm_p
|
||||
#define ZGEMM_Q gotoblas -> zgemm_q
|
||||
#define ZGEMM_R gotoblas -> zgemm_r
|
||||
|
@ -1178,14 +1142,6 @@ extern gotoblas_t *gotoblas;
|
|||
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
|
||||
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
|
||||
#endif
|
||||
#ifndef BUILD_COMPLEX
|
||||
#define CGEMM_P gotoblas -> cgemm_p
|
||||
#define CGEMM_Q gotoblas -> cgemm_q
|
||||
#define CGEMM_R gotoblas -> cgemm_r
|
||||
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
|
||||
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
|
||||
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define XGEMM_P gotoblas -> xgemm_p
|
||||
|
@ -1230,16 +1186,16 @@ extern gotoblas_t *gotoblas;
|
|||
#define HAVE_EX_L2 0
|
||||
#endif
|
||||
|
||||
#ifdef BUILD_HALF
|
||||
#define SHGEMM_P SHGEMM_DEFAULT_P
|
||||
#define SHGEMM_Q SHGEMM_DEFAULT_Q
|
||||
#define SHGEMM_R SHGEMM_DEFAULT_R
|
||||
#define SHGEMM_UNROLL_M SHGEMM_DEFAULT_UNROLL_M
|
||||
#define SHGEMM_UNROLL_N SHGEMM_DEFAULT_UNROLL_N
|
||||
#ifdef SHGEMM_DEFAULT_UNROLL_MN
|
||||
#define SHGEMM_UNROLL_MN SHGEMM_DEFAULT_UNROLL_MN
|
||||
#ifdef BUILD_BFLOAT16
|
||||
#define SBGEMM_P SBGEMM_DEFAULT_P
|
||||
#define SBGEMM_Q SBGEMM_DEFAULT_Q
|
||||
#define SBGEMM_R SBGEMM_DEFAULT_R
|
||||
#define SBGEMM_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
|
||||
#define SBGEMM_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
|
||||
#ifdef SBGEMM_DEFAULT_UNROLL_MN
|
||||
#define SBGEMM_UNROLL_MN SBGEMM_DEFAULT_UNROLL_MN
|
||||
#else
|
||||
#define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N))
|
||||
#define SBGEMM_UNROLL_MN MAX((SBGEMM_UNROLL_M), (SBGEMM_UNROLL_N))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -1354,7 +1310,7 @@ extern gotoblas_t *gotoblas;
|
|||
#endif
|
||||
|
||||
#ifndef COMPLEX
|
||||
#if (XDOUBLE)
|
||||
#if defined(XDOUBLE)
|
||||
#define GEMM_P QGEMM_P
|
||||
#define GEMM_Q QGEMM_Q
|
||||
#define GEMM_R QGEMM_R
|
||||
|
@ -1378,18 +1334,18 @@ extern gotoblas_t *gotoblas;
|
|||
#define GEMM_DEFAULT_R DGEMM_DEFAULT_R
|
||||
#define GEMM_DEFAULT_UNROLL_M DGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM_DEFAULT_UNROLL_N DGEMM_DEFAULT_UNROLL_N
|
||||
#elif (HALF)
|
||||
#define GEMM_P SHGEMM_P
|
||||
#define GEMM_Q SHGEMM_Q
|
||||
#define GEMM_R SHGEMM_R
|
||||
#define GEMM_UNROLL_M SHGEMM_UNROLL_M
|
||||
#define GEMM_UNROLL_N SHGEMM_UNROLL_N
|
||||
#define GEMM_UNROLL_MN SHGEMM_UNROLL_MN
|
||||
#define GEMM_DEFAULT_P SHGEMM_DEFAULT_P
|
||||
#define GEMM_DEFAULT_Q SHGEMM_DEFAULT_Q
|
||||
#define GEMM_DEFAULT_R SHGEMM_DEFAULT_R
|
||||
#define GEMM_DEFAULT_UNROLL_M SHGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM_DEFAULT_UNROLL_N SHGEMM_DEFAULT_UNROLL_N
|
||||
#elif defined(BFLOAT16)
|
||||
#define GEMM_P SBGEMM_P
|
||||
#define GEMM_Q SBGEMM_Q
|
||||
#define GEMM_R SBGEMM_R
|
||||
#define GEMM_UNROLL_M SBGEMM_UNROLL_M
|
||||
#define GEMM_UNROLL_N SBGEMM_UNROLL_N
|
||||
#define GEMM_UNROLL_MN SBGEMM_UNROLL_MN
|
||||
#define GEMM_DEFAULT_P SBGEMM_DEFAULT_P
|
||||
#define GEMM_DEFAULT_Q SBGEMM_DEFAULT_Q
|
||||
#define GEMM_DEFAULT_R SBGEMM_DEFAULT_R
|
||||
#define GEMM_DEFAULT_UNROLL_M SBGEMM_DEFAULT_UNROLL_M
|
||||
#define GEMM_DEFAULT_UNROLL_N SBGEMM_DEFAULT_UNROLL_N
|
||||
#else
|
||||
#define GEMM_P SGEMM_P
|
||||
#define GEMM_Q SGEMM_Q
|
||||
|
@ -1404,7 +1360,7 @@ extern gotoblas_t *gotoblas;
|
|||
#define GEMM_DEFAULT_UNROLL_N SGEMM_DEFAULT_UNROLL_N
|
||||
#endif
|
||||
#else
|
||||
#if (XDOUBLE)
|
||||
#if defined(XDOUBLE)
|
||||
#define GEMM_P XGEMM_P
|
||||
#define GEMM_Q XGEMM_Q
|
||||
#define GEMM_R XGEMM_R
|
||||
|
@ -1475,8 +1431,8 @@ extern gotoblas_t *gotoblas;
|
|||
#define GEMM_THREAD gemm_thread_n
|
||||
#endif
|
||||
|
||||
#ifndef SHGEMM_DEFAULT_R
|
||||
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
|
||||
#ifndef SBGEMM_DEFAULT_R
|
||||
#define SBGEMM_DEFAULT_R (((BUFFER_SIZE - ((SBGEMM_DEFAULT_P * SBGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SBGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
|
||||
#endif
|
||||
|
||||
#ifndef SGEMM_DEFAULT_R
|
||||
|
@ -1518,7 +1474,7 @@ extern gotoblas_t *gotoblas;
|
|||
#ifndef GEMM3M_P
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_P XGEMM3M_P
|
||||
#elif defined (DOUBLE)
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_P ZGEMM3M_P
|
||||
#else
|
||||
#define GEMM3M_P CGEMM3M_P
|
||||
|
@ -1528,7 +1484,7 @@ extern gotoblas_t *gotoblas;
|
|||
#ifndef GEMM3M_Q
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_Q XGEMM3M_Q
|
||||
#elif defined (DOUBLE)
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_Q ZGEMM3M_Q
|
||||
#else
|
||||
#define GEMM3M_Q CGEMM3M_Q
|
||||
|
@ -1538,7 +1494,7 @@ extern gotoblas_t *gotoblas;
|
|||
#ifndef GEMM3M_R
|
||||
#ifdef XDOUBLE
|
||||
#define GEMM3M_R XGEMM3M_R
|
||||
#elif defined (DOUBLE)
|
||||
#elif defined(DOUBLE)
|
||||
#define GEMM3M_R ZGEMM3M_R
|
||||
#else
|
||||
#define GEMM3M_R CGEMM3M_R
|
||||
|
|
|
@ -9,8 +9,8 @@
|
|||
int main(int argc, char **argv) {
|
||||
|
||||
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) {
|
||||
printf("SHGEMM_UNROLL_M=%d\n", SHGEMM_DEFAULT_UNROLL_M);
|
||||
printf("SHGEMM_UNROLL_N=%d\n", SHGEMM_DEFAULT_UNROLL_N);
|
||||
printf("SBGEMM_UNROLL_M=%d\n", SBGEMM_DEFAULT_UNROLL_M);
|
||||
printf("SBGEMM_UNROLL_N=%d\n", SBGEMM_DEFAULT_UNROLL_N);
|
||||
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M);
|
||||
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N);
|
||||
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M);
|
||||
|
|
32
param.h
32
param.h
|
@ -72,12 +72,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#ifndef PARAM_H
|
||||
#define PARAM_H
|
||||
|
||||
#define SHGEMM_DEFAULT_UNROLL_N 4
|
||||
#define SHGEMM_DEFAULT_UNROLL_M 8
|
||||
#define SHGEMM_DEFAULT_UNROLL_MN 32
|
||||
#define SHGEMM_DEFAULT_P 256
|
||||
#define SHGEMM_DEFAULT_R 256
|
||||
#define SHGEMM_DEFAULT_Q 256
|
||||
#define SBGEMM_DEFAULT_UNROLL_N 4
|
||||
#define SBGEMM_DEFAULT_UNROLL_M 8
|
||||
#define SBGEMM_DEFAULT_UNROLL_MN 32
|
||||
#define SBGEMM_DEFAULT_P 256
|
||||
#define SBGEMM_DEFAULT_R 256
|
||||
#define SBGEMM_DEFAULT_Q 256
|
||||
#ifdef OPTERON
|
||||
|
||||
#define SNUMOPT 4
|
||||
|
@ -2426,16 +2426,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#if defined(POWER10)
|
||||
#undef SHGEMM_DEFAULT_UNROLL_N
|
||||
#undef SHGEMM_DEFAULT_UNROLL_M
|
||||
#undef SHGEMM_DEFAULT_P
|
||||
#undef SHGEMM_DEFAULT_R
|
||||
#undef SHGEMM_DEFAULT_Q
|
||||
#define SHGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SHGEMM_DEFAULT_UNROLL_N 8
|
||||
#define SHGEMM_DEFAULT_P 832
|
||||
#define SHGEMM_DEFAULT_Q 1026
|
||||
#define SHGEMM_DEFAULT_R 4096
|
||||
#undef SBGEMM_DEFAULT_UNROLL_N
|
||||
#undef SBGEMM_DEFAULT_UNROLL_M
|
||||
#undef SBGEMM_DEFAULT_P
|
||||
#undef SBGEMM_DEFAULT_R
|
||||
#undef SBGEMM_DEFAULT_Q
|
||||
#define SBGEMM_DEFAULT_UNROLL_M 16
|
||||
#define SBGEMM_DEFAULT_UNROLL_N 8
|
||||
#define SBGEMM_DEFAULT_P 832
|
||||
#define SBGEMM_DEFAULT_Q 1026
|
||||
#define SBGEMM_DEFAULT_R 4096
|
||||
#endif
|
||||
|
||||
#if defined(SPARC) && defined(V7)
|
||||
|
|
Loading…
Reference in New Issue