Merge pull request #3915 from martin-frbg/issue3910

Fix DYNAMIC_ARCH builds that select only a subset of precisions
This commit is contained in:
Martin Kroeker 2023-02-24 07:41:33 +01:00 committed by GitHub
commit 039e27545f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 5542 additions and 137 deletions

5089
Makefile.L3 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -22,7 +22,7 @@ set(SCLAUX
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f
slasr.f slasrt.f slassq.f90 slasv2.f spttrf.f sstebz.f sstedc.f
ssteqr.f ssterf.f slaisnan.f sisnan.f
ssteqr.f ssterf.f slaisnan.f sisnan.f slarmm.f
slartgp.f slartgs.f ../INSTALL/sroundup_lwork.f
../INSTALL/second_${TIMER}.f)
@ -42,7 +42,7 @@ set(DZLAUX
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f90 dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f
dsteqr.f dsterf.f dlaisnan.f disnan.f dlarmm.f
dlartgp.f dlartgs.f ../INSTALL/droundup_lwork.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
@ -124,7 +124,7 @@ set(SLASRC
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f
slarmm.f slatrs3.f strsyl3.f sgelst.f)
slatrs3.f strsyl3.f sgelst.f)
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@ -316,7 +316,7 @@ set(DLASRC
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f
dlarmm.f dlatrs3.f dtrsyl3.f dgelst.f)
dlatrs3.f dtrsyl3.f dgelst.f)
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@ -523,7 +523,7 @@ set(SCLAUX
slaset.c slasq1.c slasq2.c slasq3.c slasq4.c slasq5.c slasq6.c
slasr.c slasrt.c slassq.c slasv2.c spttrf.c sstebz.c sstedc.c
ssteqr.c ssterf.c slaisnan.c sisnan.c
slartgp.c slartgs.c
slartgp.c slartgs.c slarmm.c
../INSTALL/second_${TIMER}.c)
set(DZLAUX
@ -542,7 +542,7 @@ set(DZLAUX
dlasq1.c dlasq2.c dlasq3.c dlasq4.c dlasq5.c dlasq6.c
dlasr.c dlasrt.c dlassq.c dlasv2.c dpttrf.c dstebz.c dstedc.c
dsteqr.c dsterf.c dlaisnan.c disnan.c
dlartgp.c dlartgs.c
dlartgp.c dlartgs.c dlarmm.c
../INSTALL/dlamch.c ../INSTALL/dsecnd_${TIMER}.c)
set(SLASRC
@ -622,7 +622,7 @@ set(SLASRC
ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
sgesvdq.c slaorhr_col_getrfnp.c
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c
slarmm.c slatrs3.c strsyl3.c sgelst.c)
slatrs3.c strsyl3.c sgelst.c)
set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
@ -812,7 +812,7 @@ set(DLASRC
dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c
dlarmm.c dlatrs3.c dtrsyl3.c dgelst.c)
dlatrs3.c dtrsyl3.c dgelst.c)
set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c

View File

@ -47,7 +47,7 @@ typedef struct {
int dtb_entries;
int offsetA, offsetB, align;
#ifdef BUILD_BFLOAT16
#if BUILD_BFLOAT16 == 1
int sbgemm_p, sbgemm_q, sbgemm_r;
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
int sbgemm_align_k;
@ -161,51 +161,59 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
#endif
#endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#if (BUILD_SINGLE == 1) || (BUILD_DOUBLE == 1) || (BUILD_COMPLEX == 1) || (BUILD_COMPLEX16 == 1)
int sgemm_p, sgemm_q, sgemm_r;
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
#endif
int exclusive_cache;
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#if (BUILD_SINGLE == 1) || (BUILD_COMPLEX == 1)
float (*samax_k) (BLASLONG, float *, BLASLONG);
float (*samin_k) (BLASLONG, float *, BLASLONG);
float (*smax_k) (BLASLONG, float *, BLASLONG);
float (*smin_k) (BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE ==1) || (BUILD_COMPLEX==1)
BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
float (*snrm2_k) (BLASLONG, float *, BLASLONG);
float (*sasum_k) (BLASLONG, float *, BLASLONG);
#endif
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE==1)
float (*ssum_k) (BLASLONG, float *, BLASLONG);
#endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
int (*sscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
int (*sswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE==1)
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
#ifdef ARCH_X86_64
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
@ -220,7 +228,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
#endif
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
#ifdef SMALL_MATRIX_OPT
int (*sgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float beta);
@ -256,7 +264,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*strsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
int (*strsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *);
#endif
#if (BUILD_SINGLE==1)
int (*strmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
@ -288,12 +297,12 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int dgemm_p, dgemm_q, dgemm_r;
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
double (*damax_k) (BLASLONG, double *, BLASLONG);
double (*damin_k) (BLASLONG, double *, BLASLONG);
double (*dmax_k) (BLASLONG, double *, BLASLONG);
@ -302,23 +311,21 @@ BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
double (*dasum_k) (BLASLONG, double *, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
#if (BUILD_DOUBLE==1)
double (*dsum_k) (BLASLONG, double *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
#endif
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE)
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -326,13 +333,13 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif
#ifdef BUILD_DOUBLE
#if (BUILD_DOUBLE==1)
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -341,7 +348,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
#endif
#ifdef BUILD_DOUBLE
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
#ifdef SMALL_MATRIX_OPT
int (*dgemm_small_matrix_permit)(int transa, int transb, BLASLONG m, BLASLONG n, BLASLONG k, double alpha, double beta);
@ -355,6 +362,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemm_small_kernel_b0_tn )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
int (*dgemm_small_kernel_b0_tt )(BLASLONG m, BLASLONG n, BLASLONG k, double * A, BLASLONG lda, double alpha, double * B, BLASLONG ldb, double * C, BLASLONG ldc);
#endif
#endif
#if (BUILD_DOUBLE==1)
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
@ -501,23 +510,25 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
#if (BUILD_COMPLEX==1)
int cgemm_p, cgemm_q, cgemm_r;
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
float (*camax_k) (BLASLONG, float *, BLASLONG);
float (*camin_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*icamax_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
float (*cnrm2_k) (BLASLONG, float *, BLASLONG);
float (*casum_k) (BLASLONG, float *, BLASLONG);
float (*csum_k) (BLASLONG, float *, BLASLONG);
int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*caxpyc_k)(BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*cscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
@ -711,7 +722,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif
#ifdef BUILD_COMPLEX16
#if (BUILD_COMPLEX16 == 1)
int zgemm_p, zgemm_q, zgemm_r;
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
@ -1093,34 +1104,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
void (*init)(void);
int snum_opt, dnum_opt, qnum_opt;
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE==1)
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
#if (BUILD_DOUBLE==1)
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
#if (BUILD_COMPLEX==1)
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
#if (BUILD_COMPLEX16==1)
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
#endif
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE==1)
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
#if (BUILD_DOUBLE==1)
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
#if (BUILD_COMPLEX==1)
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
@ -1132,7 +1143,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
#if (BUILD_COMPLEX16==1)
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
@ -1144,21 +1155,21 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
#endif
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE==1)
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
#if (BUILD_DOUBLE==1)
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
#if (BUILD_COMPLEX==1)
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
@ -1170,7 +1181,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
#if (BUILD_COMPLEX16==1)
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
@ -1182,16 +1193,16 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
#endif
#ifdef BUILD_SINGLE
#if (BUILD_SINGLE==1)
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
#if (BUILD_DOUBLE==1)
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
#if (BUILD_COMPLEX==1)
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
#if (BUILD_COMPLEX16==1)
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#endif
} gotoblas_t;
@ -1207,7 +1218,7 @@ extern gotoblas_t *gotoblas;
#define HAVE_EX_L2 gotoblas -> exclusive_cache
#ifdef BUILD_BFLOAT16
#if (BUILD_BFLOAT16==1)
#define SBGEMM_P gotoblas -> sbgemm_p
#define SBGEMM_Q gotoblas -> sbgemm_q
#define SBGEMM_R gotoblas -> sbgemm_r
@ -1216,7 +1227,7 @@ extern gotoblas_t *gotoblas;
#define SBGEMM_UNROLL_MN gotoblas -> sbgemm_unroll_mn
#endif
#if defined (BUILD_SINGLE)
#if (BUILD_SINGLE==1)
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R gotoblas -> sgemm_r
@ -1225,30 +1236,14 @@ extern gotoblas_t *gotoblas;
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
#if defined (BUILD_DOUBLE)
#if (BUILD_DOUBLE==1)
#define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R gotoblas -> dgemm_r
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif
#define QGEMM_P gotoblas -> qgemm_p
#define QGEMM_Q gotoblas -> qgemm_q
#define QGEMM_R gotoblas -> qgemm_r
#define QGEMM_UNROLL_M gotoblas -> qgemm_unroll_m
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
#ifdef BUILD_COMPLEX
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#ifndef BUILD_SINGLE
#if (BUILD_SINGLE != 1)
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R 1024
@ -1258,14 +1253,38 @@ extern gotoblas_t *gotoblas;
#endif
#endif
#ifdef BUILD_COMPLEX16
#define QGEMM_P gotoblas -> qgemm_p
#define QGEMM_Q gotoblas -> qgemm_q
#define QGEMM_R gotoblas -> qgemm_r
#define QGEMM_UNROLL_M gotoblas -> qgemm_unroll_m
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
#if (BUILD_COMPLEX==1)
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#if (BUILD_SINGLE != 1)
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R 1024
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
#endif
#if (BUILD_COMPLEX16==1)
#define ZGEMM_P gotoblas -> zgemm_p
#define ZGEMM_Q gotoblas -> zgemm_q
#define ZGEMM_R gotoblas -> zgemm_r
#define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m
#define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn
#ifndef BUILD_DOUBLE
#if (BUILD_DOUBLE != 1)
#define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R 1024
@ -1273,6 +1292,14 @@ extern gotoblas_t *gotoblas;
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif
#if (BUILD_COMPLEX != 1)
#define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#endif
#endif
#define XGEMM_P gotoblas -> xgemm_p
@ -1319,7 +1346,7 @@ extern gotoblas_t *gotoblas;
#define HAVE_EX_L2 0
#endif
#ifdef BUILD_BFLOAT16
#if (BUILD_BFLOAT16 == 1)
#define SBGEMM_P SBGEMM_DEFAULT_P
#define SBGEMM_Q SBGEMM_DEFAULT_Q
#define SBGEMM_R SBGEMM_DEFAULT_R

View File

@ -237,8 +237,74 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
if (DGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED DGEMM_SMALL_M_PERMIT)
set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NN)
set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NT)
set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TN)
set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TT)
set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NN)
set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NT)
set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TN)
set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TT)
set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE")
endif ()
endif ()
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE")
if (SGEMMINCOPY)
@ -825,7 +891,7 @@ endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach ()
if (BUILD_DOUBLE AND NOT BUILD_SINGLE)
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE")
@ -849,6 +915,45 @@ endif ()
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED SGEMM_SMALL_M_PERMIT)
set(SGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_NN)
set(SGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_NT)
set(SGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_TN)
set(SGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_TT)
set(SGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_NN)
set(SGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_NT)
set(SGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_TN)
set(SGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED SGEMM_SMALL_K_B0_TT)
set(SGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NN}" "" "gemm_small_kernel_nn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_NT}" "" "gemm_small_kernel_nt" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TN}" "" "gemm_small_kernel_tn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_TT}" "" "gemm_small_kernel_tt" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NN}" "B0" "gemm_small_kernel_b0_nn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_NT}" "B0" "gemm_small_kernel_b0_nt" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TN}" "B0" "gemm_small_kernel_b0_tn" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMM_SMALL_K_B0_TT}" "B0" "gemm_small_kernel_b0_tt" false "" "" false "SINGLE")
endif ()
endif ()
# Makefile.LA
@ -878,25 +983,25 @@ endif ()
endforeach()
if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
if (NOT DEFINED SNEG_TCOPY)
set(SNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c)
set(SNEG_TCOPY ../generic/neg_tcopy_${SGEMM_UNROLL_M}.c)
endif ()
if (NOT DEFINED SLASWP_NCOPY)
set(SLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c)
set(SLASWP_NCOPY ../generic/laswp_ncopy_${SGEMM_UNROLL_N}.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}" "" "neg_tcopy" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "SINGLE")
endif()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
if (NOT DEFINED DNEG_TCOPY)
set(DNEG_TCOPY ../generic/neg_tcopy_${${float_char}GEMM_UNROLL_M}.c)
set(DNEG_TCOPY ../generic/neg_tcopy_${DGEMM_UNROLL_M}.c)
endif ()
if (NOT DEFINED DLASWP_NCOPY)
set(DLASWP_NCOPY ../generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c)
set(DLASWP_NCOPY ../generic/laswp_ncopy_${DGEMM_UNROLL_N}.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}" "" "neg_tcopy" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}" "" "laswp_ncopy" false "" "" false "DOUBLE")
endif()
endif()
@ -979,10 +1084,117 @@ endif ()
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE")
if (DEFINED DMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED DMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE")
if (DGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED DGEMM_SMALL_M_PERMIT)
set(DGEMM_SMALL_M_PERMIT ../generic/gemm_small_matrix_permit.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NN)
set(DGEMM_SMALL_K_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_NT)
set(DGEMM_SMALL_K_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TN)
set(DGEMM_SMALL_K_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_TT)
set(DGEMM_SMALL_K_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NN)
set(DGEMM_SMALL_K_B0_NN ../generic/gemm_small_matrix_kernel_nn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_NT)
set(DGEMM_SMALL_K_B0_NT ../generic/gemm_small_matrix_kernel_nt.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TN)
set(DGEMM_SMALL_K_B0_TN ../generic/gemm_small_matrix_kernel_tn.c)
endif ()
if (NOT DEFINED DGEMM_SMALL_K_B0_TT)
set(DGEMM_SMALL_K_B0_TT ../generic/gemm_small_matrix_kernel_tt.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_M_PERMIT}" "" "gemm_small_matrix_permit" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NN" "gemm_small_kernel_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "NR" "gemm_small_kernel_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RN" "gemm_small_kernel_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NN}" "RR" "gemm_small_kernel_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NT" "gemm_small_kernel_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "NC" "gemm_small_kernel_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RT" "gemm_small_kernel_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_NT}" "RC" "gemm_small_kernel_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TN" "gemm_small_kernel_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "TR" "gemm_small_kernel_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CN" "gemm_small_kernel_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TN}" "CR" "gemm_small_kernel_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TT" "gemm_small_kernel_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "TC" "gemm_small_kernel_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CT" "gemm_small_kernel_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_TT}" "CC" "gemm_small_kernel_cc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NN}" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_NT}" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TN}" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMM_SMALL_K_B0_TT}" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "DOUBLE")
endif ()
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX)
if (BUILD_COMPLEX16 AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE")
endif()
if (BUILD_COMPLEX160 AND NOT BUILD_COMPLEX)
GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX")
if (DEFINED CMAXKERNEL)
@ -1046,7 +1258,69 @@ endif ()
if (CGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX")
endif ()
GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX")
if (SMALL_MATRIX_OPT)
if (NOT DEFINED CGEMM_SMALL_M_PERMIT)
set(CGEMM_SMALL_M_PERMIT ../generic/zgemm_small_matrix_permit)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_NN)
set(CGEMM_SMALL_K_NN ../generic/zgemm_small_matrix_kernel_nn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_NT)
set(CGEMM_SMALL_K_NT ../generic/zgemm_small_matrix_kernel_nt)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_TN)
set(CGEMM_SMALL_K_TN ../generic/zgemm_small_matrix_kernel_tn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_TT)
set(CGEMM_SMALL_K_TT ../generic/zgemm_small_matrix_kernel_tt)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_NN)
set(CGEMM_SMALL_K_B0_NN ../generic/zgemm_small_matrix_kernel_nn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_NT)
set(CGEMM_SMALL_K_B0_NT ../generic/zgemm_small_matrix_kernel_nt)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_TN)
set(CGEMM_SMALL_K_B0_TN ../generic/zgemm_small_matrix_kernel_tn)
endif ()
if (NOT DEFINED CGEMM_SMALL_K_B0_TT)
set(CGEMM_SMALL_K_B0_TT ../generic/zgemm_small_matrix_kernel_tt)
endif ()
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_M_PERMIT}.c" "" "gemm_small_matrix_permit" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NN" "gemm_small_kernel_nn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "NR" "gemm_small_kernel_nr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RN" "gemm_small_kernel_rn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NN}.c" "RR" "gemm_small_kernel_rr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NT" "gemm_small_kernel_nt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "NC" "gemm_small_kernel_nc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RT" "gemm_small_kernel_rt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_NT}.c" "RC" "gemm_small_kernel_rc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TN" "gemm_small_kernel_tn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "TR" "gemm_small_kernel_tr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CN" "gemm_small_kernel_cn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TN}.c" "CR" "gemm_small_kernel_cr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TT" "gemm_small_kernel_tt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "TC" "gemm_small_kernel_tc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CT" "gemm_small_kernel_ct" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_TT}.c" "CC" "gemm_small_kernel_cc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NN;B0" "gemm_small_kernel_b0_nn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "NR;B0" "gemm_small_kernel_b0_nr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RN;B0" "gemm_small_kernel_b0_rn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NN}.c" "RR;B0" "gemm_small_kernel_b0_rr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NT;B0" "gemm_small_kernel_b0_nt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "NC;B0" "gemm_small_kernel_b0_nc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RT;B0" "gemm_small_kernel_b0_rt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_NT}.c" "RC;B0" "gemm_small_kernel_b0_rc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TN;B0" "gemm_small_kernel_b0_tn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "TR;B0" "gemm_small_kernel_b0_tr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CN;B0" "gemm_small_kernel_b0_cn" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TN}.c" "CR;B0" "gemm_small_kernel_b0_cr" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TT;B0" "gemm_small_kernel_b0_tt" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "TC;B0" "gemm_small_kernel_b0_tc" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CT;B0" "gemm_small_kernel_b0_ct" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMM_SMALL_K_B0_TT}.c" "CC;B0" "gemm_small_kernel_b0_cc" false "" "" false "COMPLEX")
endif ()
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX")

View File

@ -207,9 +207,12 @@ ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
SBLASOBJS += \
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE))" ""
SBLASOBJS += \
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \
sger_k$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
@ -359,8 +362,7 @@ $(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNE
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
ifeq ($(BUILD_SINGLE),1)
ifneq "$(or (BUILD_SINGLE),$(BUILD_DOUBLE))" ""
$(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@

View File

@ -137,9 +137,14 @@ gotoblas_t TABLE_NAME = {
0,
#endif
#if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
#if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
samax_kTS, samin_kTS, smax_kTS, smin_kTS,
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
isamax_kTS,
#endif
#if (BUILD_SINGLE==1 ) || (BUILD_COMPLEX==1)
isamin_kTS, ismax_kTS, ismin_kTS,
snrm2_kTS, sasum_kTS,
#endif
#if BUILD_SINGLE == 1
@ -160,6 +165,8 @@ gotoblas_t TABLE_NAME = {
#endif
#if BUILD_SINGLE == 1
sger_kTS,
#endif
#if BUILD_SINGLE == 1
ssymv_LTS, ssymv_UTS,
#endif
@ -178,7 +185,7 @@ gotoblas_t TABLE_NAME = {
sgemm_oncopyTS, sgemm_otcopyTS,
#endif
#if BUILD_SINGLE == 1
#if BUILD_SINGLE == 1 || BUILD_DOUBLE == 1 || BUILD_COMPLEX == 1
#ifdef SMALL_MATRIX_OPT
sgemm_small_matrix_permitTS,
sgemm_small_kernel_nnTS, sgemm_small_kernel_ntTS, sgemm_small_kernel_tnTS, sgemm_small_kernel_ttTS,
@ -186,7 +193,7 @@ gotoblas_t TABLE_NAME = {
#endif
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX == 1)
strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
@ -198,7 +205,7 @@ gotoblas_t TABLE_NAME = {
strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
#endif
#if BUILD_SINGLE == 1
#if (BUILD_SINGLE==1)
strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
@ -215,8 +222,6 @@ gotoblas_t TABLE_NAME = {
ssymm_outcopyTS, ssymm_oltcopyTS,
#endif
ssymm_outcopyTS, ssymm_oltcopyTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
#ifndef NO_LAPACK
sneg_tcopyTS, slaswp_ncopyTS,
#else
@ -224,7 +229,7 @@ gotoblas_t TABLE_NAME = {
#endif
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
0, 0, 0,
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
#ifdef DGEMM_DEFAULT_UNROLL_MN
@ -235,7 +240,7 @@ gotoblas_t TABLE_NAME = {
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
dnrm2_kTS, dasum_kTS,
@ -243,13 +248,13 @@ gotoblas_t TABLE_NAME = {
#if (BUILD_DOUBLE==1)
dsum_kTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
dcopy_kTS, ddot_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
dsdot_kTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
drot_kTS,
daxpy_kTS,
dscal_kTS,
@ -261,7 +266,7 @@ gotoblas_t TABLE_NAME = {
dsymv_LTS, dsymv_UTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
dgemm_kernelTS, dgemm_betaTS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
dgemm_incopyTS, dgemm_itcopyTS,
@ -271,12 +276,14 @@ gotoblas_t TABLE_NAME = {
dgemm_oncopyTS, dgemm_otcopyTS,
#endif
#if (BUILD_DOUBLE==1)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
#ifdef SMALL_MATRIX_OPT
dgemm_small_matrix_permitTS,
dgemm_small_kernel_nnTS, dgemm_small_kernel_ntTS, dgemm_small_kernel_tnTS, dgemm_small_kernel_ttTS,
dgemm_small_kernel_b0_nnTS, dgemm_small_kernel_b0_ntTS, dgemm_small_kernel_b0_tnTS, dgemm_small_kernel_b0_ttTS,
#endif
#endif
#if (BUILD_DOUBLE==1)
dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
@ -366,7 +373,7 @@ gotoblas_t TABLE_NAME = {
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
#if (BUILD_COMPLEX)
0, 0, 0,
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
#ifdef CGEMM_DEFAULT_UNROLL_MN
@ -374,18 +381,23 @@ gotoblas_t TABLE_NAME = {
#else
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
#endif
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
#if (BUILD_COMPLEX)
camax_kTS, camin_kTS,
#endif
#if (BUILD_COMPLEX)
icamax_kTS,
#endif
#if (BUILD_COMPLEX)
icamin_kTS,
cnrm2_kTS, casum_kTS, csum_kTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
ccopy_kTS, cdotu_kTS, cdotc_kTS,
#if (BUILD_COMPLEX)
ccopy_kTS, cdotu_kTS, cdotc_kTS,
#endif
#if (BUILD_COMPLEX)
csrot_kTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
#if (BUILD_COMPLEX)
caxpy_kTS,
caxpyc_kTS,
cscal_kTS,
@ -399,7 +411,7 @@ gotoblas_t TABLE_NAME = {
csymv_LTS, csymv_UTS,
chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
#if (BUILD_COMPLEX)
cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
cgemm_betaTS,
#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
@ -434,6 +446,7 @@ gotoblas_t TABLE_NAME = {
ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
#endif
#endif
#if (BUILD_COMPLEX)
ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
@ -524,7 +537,7 @@ gotoblas_t TABLE_NAME = {
#endif
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
#if (BUILD_COMPLEX)
#ifndef NO_LAPACK
cneg_tcopyTS,
@ -880,7 +893,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE == 1
#if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX==1
@ -893,10 +906,10 @@ static void init_parameter(void) {
#if (BUILD_BFLOAT16)
TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q;
#endif
#if BUILD_SINGLE == 1
#if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
#endif
#if BUILD_DOUBLE== 1
#if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX== 1
@ -909,10 +922,10 @@ static void init_parameter(void) {
#if (BUILD_BFLOAT16)
TABLE_NAME.sbgemm_r = SBGEMM_DEFAULT_R;
#endif
#if BUILD_SINGLE == 1
#if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
#endif
#if BUILD_DOUBLE==1
#if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
#endif
#if BUILD_COMPLEX==1
@ -1315,7 +1328,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
#endif
#if BUILD_DOUBLE == 1
#if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
@ -1339,7 +1352,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
#endif
#if BUILD_DOUBLE == 1
#if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
@ -1363,7 +1376,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 256;
#endif
#if BUILD_DOUBLE ==1
#if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 128;
#endif
#if BUILD_COMPLEX==1
@ -1387,7 +1400,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
#endif
#if BUILD_DOUBLE ==1
#if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
@ -1411,7 +1424,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE==1
#if BUILD_DOUBLE==1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
@ -1435,7 +1448,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE == 1
#if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
@ -1459,7 +1472,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE ==1
#if BUILD_DOUBLE ==1 || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
@ -1484,7 +1497,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1508,7 +1521,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1556,7 +1569,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1581,7 +1594,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX
@ -1605,7 +1618,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1629,7 +1642,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1653,7 +1666,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1677,7 +1690,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1702,7 +1715,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1726,7 +1739,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if BUILD_DOUBLE || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1750,7 +1763,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
@ -1775,7 +1788,7 @@ static void init_parameter(void) {
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if (BUILD_DOUBLE==1)
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if (BUILD_COMPLEX==1)

View File

@ -94,7 +94,7 @@ SCLAUX = \
slagts.o slamrg.o slanst.o \
slapy2.o slapy3.o slarnv.o \
slarra.o slarrb.o slarrc.o slarrd.o slarre.o slarrf.o slarrj.o \
slarrk.o slarrr.o slaneg.o \
slarrk.o slarrr.o slaneg.o slarmm.o \
slartg.o slaruv.o slas2.o slascl.o \
slasd0.o slasd1.o slasd2.o slasd3.o slasd4.o slasd5.o slasd6.o \
slasd7.o slasd8.o slasda.o slasdq.o slasdt.o \
@ -116,7 +116,7 @@ DZLAUX = \
dlagts.o dlamrg.o dlanst.o \
dlapy2.o dlapy3.o dlarnv.o \
dlarra.o dlarrb.o dlarrc.o dlarrd.o dlarre.o dlarrf.o dlarrj.o \
dlarrk.o dlarrr.o dlaneg.o \
dlarrk.o dlarrr.o dlaneg.o dlarmm.o \
dlartg.o dlaruv.o dlas2.o dlascl.o \
dlasd0.o dlasd1.o dlasd2.o dlasd3.o dlasd4.o dlasd5.o dlasd6.o \
dlasd7.o dlasd8.o dlasda.o dlasdq.o dlasdt.o \
@ -207,7 +207,7 @@ SLASRC_O = \
ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
sgesvdq.o slarmm.o slatrs3.o strsyl3.o sgelst.o
sgesvdq.o slatrs3.o strsyl3.o sgelst.o
endif
@ -417,7 +417,7 @@ DLASRC_O = \
dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
dgesvdq.o dlarmm.o dlatrs3.o dtrsyl3.o dgelst.o
dgesvdq.o dlatrs3.o dtrsyl3.o dgelst.o
endif
ifdef USEXBLAS