Merge pull request #2852 from martin-frbg/issue2588-cmake

Support building only a subset of variable types
This commit is contained in:
Martin Kroeker 2020-10-11 22:21:33 +02:00 committed by GitHub
commit ec638a82bf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 2580 additions and 755 deletions

View File

@ -304,6 +304,18 @@ else
endif endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1) ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_SINGLE), 1)
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_DOUBLE), 1)
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX), 1)
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX16), 1)
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif endif
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc

View File

@ -9,7 +9,7 @@ ifndef TOPDIR
TOPDIR = . TOPDIR = .
endif endif
# If ARCH is not set, we use the host system's architecture for getarch compile options. # If ARCH is not set, we use the host system's architecture for getarch compile options.
ifndef ARCH ifndef ARCH
HOSTARCH := $(shell uname -m) HOSTARCH := $(shell uname -m)
else else
@ -73,6 +73,18 @@ endif
# #
# Beginning of system configuration # Beginning of system configuration
# #
ifneq ($(BUILD_SINGLE),1)
ifneq ($(BUILD_DOUBLE),1)
ifneq ($(BUILD_COMPLEX),1)
ifneq ($(BUILD_COMPLEX16),1)
override BUILD_SINGLE=1
override BUILD_DOUBLE=1
override BUILD_COMPLEX=1
override BUILD_COMPLEX16=1
endif
endif
endif
endif
ifndef HOSTCC ifndef HOSTCC
HOSTCC = $(CC) HOSTCC = $(CC)
@ -1224,16 +1236,16 @@ ifeq ($(BUILD_HALF), 1)
CCOMMON_OPT += -DBUILD_HALF CCOMMON_OPT += -DBUILD_HALF
endif endif
ifeq ($(BUILD_SINGLE), 1) ifeq ($(BUILD_SINGLE), 1)
CCOMMON_OPT += -DBUILD_SINGLE CCOMMON_OPT += -DBUILD_SINGLE=1
endif endif
ifeq ($(BUILD_DOUBLE), 1) ifeq ($(BUILD_DOUBLE), 1)
CCOMMON_OPT += -DBUILD_DOUBLE CCOMMON_OPT += -DBUILD_DOUBLE=1
endif endif
ifeq ($(BUILD_COMPLEX), 1) ifeq ($(BUILD_COMPLEX), 1)
CCOMMON_OPT += -DBUILD_COMPLEX CCOMMON_OPT += -DBUILD_COMPLEX=1
endif endif
ifeq ($(BUILD_COMPLEX16), 1) ifeq ($(BUILD_COMPLEX16), 1)
CCOMMON_OPT += -DBUILD_COMPLEX16 CCOMMON_OPT += -DBUILD_COMPLEX16=1
endif endif
CCOMMON_OPT += -DVERSION=\"$(VERSION)\" CCOMMON_OPT += -DVERSION=\"$(VERSION)\"

View File

@ -11,8 +11,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX)) HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) BLASOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) BLASOBJS_P = $(SHEXTOBJS_P) $(SHBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
ifdef EXPRECISION ifdef EXPRECISION
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)

View File

@ -1,11 +1,12 @@
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. # Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files.
set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F set(ALLAUX ilaenv.f ilaenv2stage.f ieeeck.f lsamen.f iparmq.f iparam2stage.F
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f dlaset.f
../INSTALL/ilaver.f xerbla_array.f ../INSTALL/ilaver.f xerbla_array.f
../INSTALL/slamch.f) ../INSTALL/slamch.f)
set(SCLAUX set(SCLAUX
scombssq.f sbdsvdx.f sstevx.f sstein.f
sbdsdc.f sbdsdc.f
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f
@ -25,6 +26,7 @@ set(SCLAUX
set(DZLAUX set(DZLAUX
dbdsdc.f dbdsdc.f
dbdsvdx.f dstevx.f dstein.f
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f
@ -35,14 +37,14 @@ set(DZLAUX
dlartg.f dlaruv.f dlas2.f dlascl.f dlartg.f dlaruv.f dlas2.f dlascl.f
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f
dsteqr.f dsterf.f dlaisnan.f disnan.f dsteqr.f dsterf.f dlaisnan.f disnan.f
dlartgp.f dlartgs.f dlartgp.f dlartgs.f
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f) ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f)
set(SLASRC set(SLASRC
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgehd2.f sgehrd.f sgelq2.f sgelqf.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f
@ -83,8 +85,8 @@ set(SLASRC
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f ssptrf.f ssptri.f ssptrs.f sstegr.f sstev.f sstevd.f sstevr.f
sstevx.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f
ssyswapr.f ssytrs.f ssytrs2.f ssyswapr.f ssytrs.f ssytrs2.f
@ -116,7 +118,7 @@ set(SLASRC
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f ) slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
@ -229,7 +231,7 @@ set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f) cla_lin_berr.f clarscl2.f clascl2.f cla_wwaddw.f)
set(DLASRC set(DLASRC
dbdsvdx.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgehd2.f dgehrd.f dgelq2.f dgelqf.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f
@ -270,8 +272,8 @@ set(DLASRC
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f dsptrf.f dsptri.f dsptrs.f dstegr.f dstev.f dstevd.f dstevr.f
dstevx.f dsycon.f dsyev.f dsyevd.f dsyevr.f dsycon.f dsyev.f dsyevd.f dsyevr.f
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f
dsysv.f dsysvx.f dsysv.f dsysvx.f
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytrs.f dsytrs2.f
@ -474,12 +476,16 @@ endif()
if(BUILD_COMPLEX) if(BUILD_COMPLEX)
set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX}) set(LA_REL_SRC ${LA_REL_SRC} ${CLASRC} ${ZCLASRC} ${ALLAUX} ${SCLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN}) SET(LA_GEN_SRC ${LA_GEN_SRC} ${CMATGEN} ${SCATGEN})
message(STATUS "Building Complex Precision") message(STATUS "Building Single Precision Complex")
endif() endif()
if(BUILD_COMPLEX16) if(BUILD_COMPLEX16)
set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX}) set(LA_REL_SRC ${LA_REL_SRC} ${ZLASRC} ${ZCLASRC} ${ALLAUX} ${DZLAUX})
SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN}) SET(LA_GEN_SRC ${LA_GEN_SRC} ${ZMATGEN} ${DZATGEN})
message(STATUS "Building Double Complex Precision") # for zlange/zlanhe
if (NOT BUILD_DOUBLE)
set (LA_REL_SRC ${LA_REL_SRC} dcombssq.f)
endif ()
message(STATUS "Building Double Precision Complex")
endif() endif()
# add lapack-netlib folder to the sources # add lapack-netlib folder to the sources

View File

@ -146,26 +146,34 @@ BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG);
int (*shlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); int (*shlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif #endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
int sgemm_p, sgemm_q, sgemm_r; int sgemm_p, sgemm_q, sgemm_r;
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn; int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn;
#endif
int exclusive_cache; int exclusive_cache;
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
float (*samax_k) (BLASLONG, float *, BLASLONG); float (*samax_k) (BLASLONG, float *, BLASLONG);
float (*samin_k) (BLASLONG, float *, BLASLONG); float (*samin_k) (BLASLONG, float *, BLASLONG);
float (*smax_k) (BLASLONG, float *, BLASLONG); float (*smax_k) (BLASLONG, float *, BLASLONG);
float (*smin_k) (BLASLONG, float *, BLASLONG); float (*smin_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG); BLASLONG (*isamax_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG); BLASLONG (*isamin_k)(BLASLONG, float *, BLASLONG);
BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG); BLASLONG (*ismax_k) (BLASLONG, float *, BLASLONG);
BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
float (*snrm2_k) (BLASLONG, float *, BLASLONG); float (*snrm2_k) (BLASLONG, float *, BLASLONG);
float (*sasum_k) (BLASLONG, float *, BLASLONG); float (*sasum_k) (BLASLONG, float *, BLASLONG);
#endif
#ifdef BUILD_SINGLE
float (*ssum_k) (BLASLONG, float *, BLASLONG); float (*ssum_k) (BLASLONG, float *, BLASLONG);
#endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*scopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); float (*sdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
@ -175,11 +183,15 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
#ifdef BUILD_SINGLE
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
#endif
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
#ifdef ARCH_X86_64 #ifdef ARCH_X86_64
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K); int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
@ -193,7 +205,8 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
#endif
#ifdef BUILD_SINGLE
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
@ -245,10 +258,14 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); int (*slaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int dgemm_p, dgemm_q, dgemm_r; int dgemm_p, dgemm_q, dgemm_r;
int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn; int dgemm_unroll_m, dgemm_unroll_n, dgemm_unroll_mn;
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*damax_k) (BLASLONG, double *, BLASLONG); double (*damax_k) (BLASLONG, double *, BLASLONG);
double (*damin_k) (BLASLONG, double *, BLASLONG); double (*damin_k) (BLASLONG, double *, BLASLONG);
double (*dmax_k) (BLASLONG, double *, BLASLONG); double (*dmax_k) (BLASLONG, double *, BLASLONG);
@ -257,25 +274,37 @@ BLASLONG (*idamax_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG); BLASLONG (*idamin_k)(BLASLONG, double *, BLASLONG);
BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG); BLASLONG (*idmax_k) (BLASLONG, double *, BLASLONG);
BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
double (*dnrm2_k) (BLASLONG, double *, BLASLONG); double (*dnrm2_k) (BLASLONG, double *, BLASLONG);
double (*dasum_k) (BLASLONG, double *, BLASLONG); double (*dasum_k) (BLASLONG, double *, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
double (*dsum_k) (BLASLONG, double *, BLASLONG); double (*dsum_k) (BLASLONG, double *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); double (*ddot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG);
#endif
#if defined (BUILD_SINGLE) || defined(BUILD_DOUBLE)
double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif
#ifdef BUILD_DOUBLE
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG); int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@ -283,7 +312,8 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dgemm_itcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_itcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
#endif
#ifdef BUILD_DOUBLE
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
@ -335,7 +365,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
int (*dneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*dlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *); int (*dlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
int qgemm_p, qgemm_q, qgemm_r; int qgemm_p, qgemm_q, qgemm_r;
@ -430,6 +460,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
#endif #endif
#ifdef BUILD_COMPLEX
int cgemm_p, cgemm_q, cgemm_r; int cgemm_p, cgemm_q, cgemm_r;
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn; int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
@ -593,7 +624,9 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*cneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *);
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
#endif
#ifdef BUILD_COMPLEX16
int zgemm_p, zgemm_q, zgemm_r; int zgemm_p, zgemm_q, zgemm_r;
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn; int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
@ -757,6 +790,7 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG);
int (*zneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*zneg_tcopy) (BLASLONG, BLASLONG, double *, BLASLONG, double *);
int (*zlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *); int (*zlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, double *, BLASLONG, blasint *, double *);
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
@ -930,22 +964,34 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
void (*init)(void); void (*init)(void);
int snum_opt, dnum_opt, qnum_opt; int snum_opt, dnum_opt, qnum_opt;
#ifdef BUILD_SINGLE
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG); int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG); int (*daxpby_k) (BLASLONG, double, double*, BLASLONG,double, double*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG); int (*caxpby_k) (BLASLONG, float, float, float*, BLASLONG,float,float, float*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
#endif
#ifdef BUILD_SINGLE
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
@ -955,7 +1001,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG); int (*comatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG, float*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
@ -965,17 +1013,23 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG);
#endif
#ifdef BUILD_SINGLE
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG);
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG);
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
@ -985,7 +1039,9 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
@ -995,12 +1051,20 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG);
#endif
#ifdef BUILD_SINGLE
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
#endif
#ifdef BUILD_DOUBLE
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
#endif
#ifdef BUILD_COMPLEX
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
#endif
#ifdef BUILD_COMPLEX16
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#endif
} gotoblas_t; } gotoblas_t;
extern gotoblas_t *gotoblas; extern gotoblas_t *gotoblas;
@ -1021,19 +1085,23 @@ extern gotoblas_t *gotoblas;
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn #define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
#endif #endif
#if defined (BUILD_SINGLE)
#define SGEMM_P gotoblas -> sgemm_p #define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q #define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R gotoblas -> sgemm_r #define SGEMM_R gotoblas -> sgemm_r
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m #define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n #define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn #define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
#if defined (BUILD_DOUBLE)
#define DGEMM_P gotoblas -> dgemm_p #define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q #define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R gotoblas -> dgemm_r #define DGEMM_R gotoblas -> dgemm_r
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m #define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n #define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn #define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif
#define QGEMM_P gotoblas -> qgemm_p #define QGEMM_P gotoblas -> qgemm_p
#define QGEMM_Q gotoblas -> qgemm_q #define QGEMM_Q gotoblas -> qgemm_q
@ -1042,19 +1110,39 @@ extern gotoblas_t *gotoblas;
#define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n #define QGEMM_UNROLL_N gotoblas -> qgemm_unroll_n
#define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn #define QGEMM_UNROLL_MN gotoblas -> qgemm_unroll_mn
#ifdef BUILD_COMPLEX
#define CGEMM_P gotoblas -> cgemm_p #define CGEMM_P gotoblas -> cgemm_p
#define CGEMM_Q gotoblas -> cgemm_q #define CGEMM_Q gotoblas -> cgemm_q
#define CGEMM_R gotoblas -> cgemm_r #define CGEMM_R gotoblas -> cgemm_r
#define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m #define CGEMM_UNROLL_M gotoblas -> cgemm_unroll_m
#define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n #define CGEMM_UNROLL_N gotoblas -> cgemm_unroll_n
#define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn #define CGEMM_UNROLL_MN gotoblas -> cgemm_unroll_mn
#ifndef BUILD_SINGLE
#define SGEMM_P gotoblas -> sgemm_p
#define SGEMM_Q gotoblas -> sgemm_q
#define SGEMM_R 1024
#define SGEMM_UNROLL_M gotoblas -> sgemm_unroll_m
#define SGEMM_UNROLL_N gotoblas -> sgemm_unroll_n
#define SGEMM_UNROLL_MN gotoblas -> sgemm_unroll_mn
#endif
#endif
#ifdef BUILD_COMPLEX16
#define ZGEMM_P gotoblas -> zgemm_p #define ZGEMM_P gotoblas -> zgemm_p
#define ZGEMM_Q gotoblas -> zgemm_q #define ZGEMM_Q gotoblas -> zgemm_q
#define ZGEMM_R gotoblas -> zgemm_r #define ZGEMM_R gotoblas -> zgemm_r
#define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m #define ZGEMM_UNROLL_M gotoblas -> zgemm_unroll_m
#define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n #define ZGEMM_UNROLL_N gotoblas -> zgemm_unroll_n
#define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn #define ZGEMM_UNROLL_MN gotoblas -> zgemm_unroll_mn
#ifndef BUILD_DOUBLE
#define DGEMM_P gotoblas -> dgemm_p
#define DGEMM_Q gotoblas -> dgemm_q
#define DGEMM_R 1024
#define DGEMM_UNROLL_M gotoblas -> dgemm_unroll_m
#define DGEMM_UNROLL_N gotoblas -> dgemm_unroll_n
#define DGEMM_UNROLL_MN gotoblas -> dgemm_unroll_mn
#endif
#endif
#define XGEMM_P gotoblas -> xgemm_p #define XGEMM_P gotoblas -> xgemm_p
#define XGEMM_Q gotoblas -> xgemm_q #define XGEMM_Q gotoblas -> xgemm_q

View File

@ -46,56 +46,155 @@ else
all :: all1 all2 all3 all :: all1 all2 all3
endif endif
all1: xscblat1 xdcblat1 xccblat1 xzcblat1 ifeq ($(BUILD_SINGLE),1)
all1targets += xscblat1
endif
ifeq ($(BUILD_DOUBLE),1)
all1targets += xdcblat1
endif
ifeq ($(BUILD_COMPLEX),1)
all1targets += xccblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
all1targets += xzcblat1
endif
all1: $(all1targets)
ifndef CROSS ifndef CROSS
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat1 OMP_NUM_THREADS=2 ./xscblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./xdcblat1 OMP_NUM_THREADS=2 ./xdcblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./xccblat1 OMP_NUM_THREADS=2 ./xccblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat1 OMP_NUM_THREADS=2 ./xzcblat1
endif
else else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./xscblat1 OPENBLAS_NUM_THREADS=2 ./xscblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./xdcblat1 OPENBLAS_NUM_THREADS=2 ./xdcblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat1 OPENBLAS_NUM_THREADS=2 ./xccblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat1 OPENBLAS_NUM_THREADS=2 ./xzcblat1
endif endif
endif endif
endif
ifeq ($(BUILD_SINGLE),1)
all2targets += xscblat2
endif
ifeq ($(BUILD_DOUBLE),1)
all2targets += xdcblat2
endif
ifeq ($(BUILD_COMPLEX),1)
all2targets += xccblat2
endif
ifeq ($(BUILD_COMPLEX16),1)
all2targets += xzcblat2
endif
all2: $(all2targets)
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
ifndef CROSS ifndef CROSS
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat2 < sin2 OMP_NUM_THREADS=2 ./xscblat2 < sin2
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./xdcblat2 < din2 OMP_NUM_THREADS=2 ./xdcblat2 < din2
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./xccblat2 < cin2 OMP_NUM_THREADS=2 ./xccblat2 < cin2
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat2 < zin2 OMP_NUM_THREADS=2 ./xzcblat2 < zin2
endif
else else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2 OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2 OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
endif endif
endif endif
endif
ifeq ($(BUILD_SINGLE),1)
all3targets += xscblat3
endif
ifeq ($(BUILD_DOUBLE),1)
all3targets += xdcblat3
endif
ifeq ($(BUILD_COMPLEX),1)
all3targets += xccblat3
endif
ifeq ($(BUILD_COMPLEX16),1)
all3targets += xzcblat3
endif
all3: $(all3targets)
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
ifndef CROSS ifndef CROSS
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat3 < sin3 OMP_NUM_THREADS=2 ./xscblat3 < sin3
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./xdcblat3 < din3 OMP_NUM_THREADS=2 ./xdcblat3 < din3
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./xccblat3 < cin3 OMP_NUM_THREADS=2 ./xccblat3 < cin3
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat3 < zin3 OMP_NUM_THREADS=2 ./xzcblat3 < zin3
endif
else else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3 OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3 OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3 OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3 OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3
endif endif
endif
endif
all3_3m: xzcblat3_3m xccblat3_3m all3_3m: xzcblat3_3m xccblat3_3m
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif
else else
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif endif
endif endif
@ -115,13 +214,19 @@ endif
endif endif
endif endif
ifeq ($(BUILD_SINGLE),1)
# Single real # Single real
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME) xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xscblat1 c_sblat1.o $(stestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xscblat1 c_sblat1.o $(stestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xscblat2: $(stestl2o) c_sblat2.o $(TOPDIR)/$(LIBNAME) xscblat2: $(stestl2o) c_sblat2.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xscblat2 c_sblat2.o $(stestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xscblat2 c_sblat2.o $(stestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xscblat3: $(stestl3o) c_sblat3.o $(TOPDIR)/$(LIBNAME) xscblat3: $(stestl3o) c_sblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xscblat3 c_sblat3.o $(stestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xscblat3 c_sblat3.o $(stestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
ifeq ($(BUILD_DOUBLE),1)
# Double real # Double real
xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME) xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@ -129,7 +234,10 @@ xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME) xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
ifeq ($(BUILD_COMPLEX),1)
# Single complex # Single complex
xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME) xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@ -140,7 +248,10 @@ xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME) xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
ifeq ($(BUILD_COMPLEX16),1)
# Double complex # Double complex
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@ -152,6 +263,6 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME) xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
include $(TOPDIR)/Makefile.tail include $(TOPDIR)/Makefile.tail

View File

@ -197,6 +197,19 @@ foreach (float_type ${FLOAT_TYPES})
endif () endif ()
endforeach () endforeach ()
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
if (USE_THREAD)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "SINGLE")
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "SINGLE")
endif ()
endif ()
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
if (USE_THREAD)
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "DOUBLE")
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "DOUBLE")
endif ()
endif ()
if (USE_THREAD) if (USE_THREAD)
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
endif () endif ()

View File

@ -417,19 +417,63 @@ XBLASOBJS += \
endif endif
ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
ifeq ($(BUILD_DOUBLE),1)
ifdef SMP
SBLASOBJS += \
sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) \
strsv_NUU.$(SUFFIX) strsv_NUN.$(SUFFIX) strsv_NLU.$(SUFFIX) strsv_NLN.$(SUFFIX) \
strsv_TUU.$(SUFFIX) strsv_TUN.$(SUFFIX) strsv_TLU.$(SUFFIX) strsv_TLN.$(SUFFIX)
endif
endif
ifeq ($(BUILD_COMPLEX),1)
ifdef SMP
SBLASOBJS = sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
ifdef SMP
DBLASOBJS = dgemv_thread_n.$(SUFFIX) dgemv_thread_t.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS= \
ctrsv_NUU.$(SUFFIX) ctrsv_NUN.$(SUFFIX) ctrsv_NLU.$(SUFFIX) ctrsv_NLN.$(SUFFIX) \
ctrsv_TUU.$(SUFFIX) ctrsv_TUN.$(SUFFIX) ctrsv_TLU.$(SUFFIX) ctrsv_TLN.$(SUFFIX) \
ctrsv_RUU.$(SUFFIX) ctrsv_RUN.$(SUFFIX) ctrsv_RLU.$(SUFFIX) ctrsv_RLN.$(SUFFIX) \
ctrsv_CUU.$(SUFFIX) ctrsv_CUN.$(SUFFIX) ctrsv_CLU.$(SUFFIX) ctrsv_CLN.$(SUFFIX)
endif
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
all :: all ::
ifeq ($(BUILD_SINGLE),1)
sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -UDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<
sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -UDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
endif
ifeq ($(BUILD_DOUBLE),1)
dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<
dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
endif
qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DXDOUBLE -UTRANS $(CFLAGS) -o $(@F) $<
@ -437,6 +481,8 @@ qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c
qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DXDOUBLE -DTRANS $(CFLAGS) -o $(@F) $<
ifeq ($(BUILD_COMPLEX),1)
cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@ -460,6 +506,9 @@ cgbmv_s.$(SUFFIX) cgbmv_s.$(PSUFFIX) : zgbmv_k.c
cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif
ifeq ($(BUILD_COMPLEX16),1)
zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@ -484,6 +533,7 @@ zgbmv_s.$(SUFFIX) zgbmv_s.$(PSUFFIX) : zgbmv_k.c
zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif
xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@ -509,24 +559,34 @@ xgbmv_s.$(SUFFIX) xgbmv_s.$(PSUFFIX) : zgbmv_k.c
xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
ifeq ($(BUILD_SINGLE),1)
sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -UDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -UDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<
sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -UDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -UDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
endif
ifeq ($(BUILD_DOUBLE),1)
dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<
dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
endif
qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DXDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DXDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $<
qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -UCOMPLEX -DXDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< $(CC) -c -UCOMPLEX -DXDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $<
ifeq ($(BUILD_COMPLEX),1)
cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@ -550,6 +610,10 @@ cgbmv_thread_s.$(SUFFIX) cgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif
ifeq ($(BUILD_COMPLEX16),1)
zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@ -574,6 +638,7 @@ zgbmv_thread_s.$(SUFFIX) zgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
endif
xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $<
@ -599,24 +664,32 @@ xgbmv_thread_s.$(SUFFIX) xgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c
xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c
$(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< $(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $<
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
endif
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
endif
qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
ifeq ($(BUILD_COMPLEX),1)
cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
@ -640,6 +713,10 @@ cgemv_thread_s.$(SUFFIX) cgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.
cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F)
endif
ifeq ($(BUILD_COMPLEX16),1)
zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
@ -664,6 +741,7 @@ zgemv_thread_s.$(SUFFIX) zgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common.
zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F)
endif
xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)

View File

@ -14,6 +14,24 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES})
endif () endif ()
endforeach () endforeach ()
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE")
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE")
endif()
endforeach()
endif()
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
foreach (GEMM_DEFINE ${GEMM_DEFINES})
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE")
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE")
endif()
endforeach()
endif()
set(TRMM_TRSM_SOURCES set(TRMM_TRSM_SOURCES
trmm_L.c trmm_L.c
@ -100,7 +118,24 @@ foreach (float_type ${FLOAT_TYPES})
endif() endif()
endif () endif ()
endforeach () endforeach ()
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "DOUBLE" )
endif()
endforeach()
endif ()
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
foreach (gemm_define ${GEMM_COMPLEX_DEFINES})
string(TOLOWER ${gemm_define} gemm_define_LC)
if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3)
GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "SINGLE" )
endif()
endforeach()
endif ()
# for gemm3m # for gemm3m
if(USE_GEMM3M) if(USE_GEMM3M)
foreach (GEMM_DEFINE ${GEMM_DEFINES}) foreach (GEMM_DEFINE ${GEMM_DEFINES})

View File

@ -287,6 +287,60 @@ HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \
dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
endif endif
ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
ifeq ($(BUILD_DOUBLE),1)
SBLASOBJS= \
strsm_LNUU.$(SUFFIX) strsm_LNUN.$(SUFFIX) strsm_LNLU.$(SUFFIX) strsm_LNLN.$(SUFFIX) \
strsm_LTUU.$(SUFFIX) strsm_LTUN.$(SUFFIX) strsm_LTLU.$(SUFFIX) strsm_LTLN.$(SUFFIX) \
strsm_RNUU.$(SUFFIX) strsm_RNUN.$(SUFFIX) strsm_RNLU.$(SUFFIX) strsm_RNLN.$(SUFFIX) \
strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \
ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \
ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX)
endif
endif
ifeq ($(BUILD_COMPLEX),1)
SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS= \
cherk_UN.$(SUFFIX) cherk_UC.$(SUFFIX) cherk_LN.$(SUFFIX) cherk_LC.$(SUFFIX) \
cherk_kernel_UN.$(SUFFIX) cherk_kernel_UC.$(SUFFIX) \
cherk_kernel_LN.$(SUFFIX) cherk_kernel_LC.$(SUFFIX) \
ctrsm_LNUU.$(SUFFIX) ctrsm_LNUN.$(SUFFIX) ctrsm_LNLU.$(SUFFIX) ctrsm_LNLN.$(SUFFIX) \
ctrsm_LTUU.$(SUFFIX) ctrsm_LTUN.$(SUFFIX) ctrsm_LTLU.$(SUFFIX) ctrsm_LTLN.$(SUFFIX) \
ctrsm_LRUU.$(SUFFIX) ctrsm_LRUN.$(SUFFIX) ctrsm_LRLU.$(SUFFIX) ctrsm_LRLN.$(SUFFIX) \
ctrsm_LCUU.$(SUFFIX) ctrsm_LCUN.$(SUFFIX) ctrsm_LCLU.$(SUFFIX) ctrsm_LCLN.$(SUFFIX) \
ctrsm_RNUU.$(SUFFIX) ctrsm_RNUN.$(SUFFIX) ctrsm_RNLU.$(SUFFIX) ctrsm_RNLN.$(SUFFIX) \
ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \
ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \
ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX)
endif
endif
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
all :: all ::
shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h

View File

@ -56,12 +56,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
if (!(mode & BLAS_COMPLEX)) { if (!(mode & BLAS_COMPLEX)) {
switch (mode & BLAS_PREC) { switch (mode & BLAS_PREC) {
#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX)
case BLAS_SINGLE: case BLAS_SINGLE:
mask = SGEMM_UNROLL_MN - 1; mask = SGEMM_UNROLL_MN - 1;
break; break;
#endif
#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16)
case BLAS_DOUBLE: case BLAS_DOUBLE:
mask = DGEMM_UNROLL_MN - 1; mask = DGEMM_UNROLL_MN - 1;
break; break;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
case BLAS_XDOUBLE: case BLAS_XDOUBLE:
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1;
@ -70,12 +74,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (
} }
} else { } else {
switch (mode & BLAS_PREC) { switch (mode & BLAS_PREC) {
#ifdef BUILD_COMPLEX
case BLAS_SINGLE: case BLAS_SINGLE:
mask = CGEMM_UNROLL_MN - 1; mask = CGEMM_UNROLL_MN - 1;
break; break;
#endif
#ifdef BUILD_COMPLEX16
case BLAS_DOUBLE: case BLAS_DOUBLE:
mask = ZGEMM_UNROLL_MN - 1; mask = ZGEMM_UNROLL_MN - 1;
break; break;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
case BLAS_XDOUBLE: case BLAS_XDOUBLE:
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1;

View File

@ -459,13 +459,16 @@ blas_queue_t *tscq;
} else } else
#endif #endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) { if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
#ifdef BUILD_DOUBLE
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_SINGLE
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
} else { #endif
} else {
/* Other types in future */ /* Other types in future */
} }
} else { } else {
@ -476,11 +479,15 @@ blas_queue_t *tscq;
} else } else
#endif #endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#ifdef BUILD_COMPLEX16
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_COMPLEX
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else { } else {
/* Other types in future */ /* Other types in future */
} }

View File

@ -315,12 +315,15 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
} else } else
#endif #endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#if defined ( BUILD_DOUBLE) || defined (BUILD_COMPLEX16)
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE){ } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE){
#if defined (BUILD_SINGLE) || defined (BUILD_COMPLEX)
sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif
} else { } else {
/* Other types in future */ /* Other types in future */
} }
@ -332,15 +335,24 @@ static void exec_threads(blas_queue_t *queue, int buf_index){
} else } else
#endif #endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
#ifdef BUILD_COMPLEX16
sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#else
fprintf(stderr,"UNHANDLED COMPLEX16\n");
#endif
} else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) {
#ifdef BUILD_COMPLEX
sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#else
fprintf(stderr,"UNHANDLED COMPLEX\n");
#endif
} else { } else {
/* Other types in future */ /* Other types in future */
} }
} }
if (!sb) fprintf(stderr,"SB not declared!!!\n");
queue->sb=sb; queue->sb=sb;
} }
} }

View File

@ -2201,11 +2201,17 @@ static void *alloc_mmap(void *address){
#endif #endif
#endif #endif
#ifdef BUILD_DOUBLE
allocsize = DGEMM_P * DGEMM_Q * sizeof(double); allocsize = DGEMM_P * DGEMM_Q * sizeof(double);
#elif defined(BUILD_COMPLEX16)
start = (BLASULONG)map_address; allocsize = ZGEMM_P * ZGEMM_Q * sizeof(double);
current = (SCALING - 1) * BUFFER_SIZE; #elif defined(BUILD_COMPLEX)
allocsize = CGEMM_P * CGEMM_Q * sizeof(double);
#else
allocsize = SGEMM_P * SGEMM_Q * sizeof(double);
#endif
start = (BLASULONG)map_address;
current = (SCALING - 1) * BUFFER_SIZE;
while(current > 0) { while(current > 0) {
*(BLASLONG *)start = (BLASLONG)start + PAGESIZE; *(BLASLONG *)start = (BLASLONG)start + PAGESIZE;

View File

@ -33,6 +33,18 @@ endif
ifndef BUILD_HALF ifndef BUILD_HALF
BUILD_HALF = 0 BUILD_HALF = 0
endif endif
ifndef BUILD_SINGLE
BUILD_SINGLE = 0
endif
ifndef BUILD_DOUBLE
BUILD_DOUBLE = 0
endif
ifndef BUILD_COMPLEX
BUILD_COMPLEX = 0
endif
ifndef BUILD_COMPLEX16
BUILD_COMPLEX16 = 0
endif
ifeq ($(OSNAME), WINNT) ifeq ($(OSNAME), WINNT)
ifeq ($(F_COMPILER), GFORTRAN) ifeq ($(F_COMPILER), GFORTRAN)
@ -108,10 +120,10 @@ dll : ../$(LIBDLLNAME)
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
$(LIBPREFIX).def : gensymbol $(LIBPREFIX).def : gensymbol
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
libgoto_hpl.def : gensymbol libgoto_hpl.def : gensymbol
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
ifeq ($(OSNAME), Darwin) ifeq ($(OSNAME), Darwin)
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
@ -246,23 +258,23 @@ static : ../$(LIBNAME)
rm -f goto.$(SUFFIX) rm -f goto.$(SUFFIX)
osx.def : gensymbol ../Makefile.system ../getarch.c osx.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
aix.def : gensymbol ../Makefile.system ../getarch.c aix.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
objcopy.def : gensymbol ../Makefile.system ../getarch.c objcopy.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
objconv.def : gensymbol ../Makefile.system ../getarch.c objconv.def : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
test : linktest.c test : linktest.c
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
rm -f linktest rm -f linktest
linktest.c : gensymbol ../Makefile.system ../getarch.c linktest.c : gensymbol ../Makefile.system ../getarch.c
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c
clean :: clean ::
@rm -f *.def *.dylib __.SYMDEF* *.renamed @rm -f *.def *.dylib __.SYMDEF* *.renamed

File diff suppressed because it is too large Load Diff

View File

@ -83,8 +83,12 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS})
GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX})
#sdsdot, dsdot #sdsdot, dsdot
if (BUILD_SINGLE OR BUILD_DOUBLE)
GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
endif ()
if (BUILD_DOUBLE)
GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE")
endif ()
# trmm is trsm with a compiler flag set # trmm is trsm with a compiler flag set
GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG})
@ -167,4 +171,31 @@ if (NOT DEFINED NO_LAPACK)
GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3)
endif () endif ()
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "SINGLE")
GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "SINGLE")
GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "SINGLE")
GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "SINGLE")
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "SINGLE")
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "SINGLE")
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "SINGLE")
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "SINGLE")
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "SINGLE")
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "SINGLE")
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "SINGLE")
endif ()
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "DOUBLE")
GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "DOUBLE")
GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "DOUBLE")
GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "DOUBLE")
GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "DOUBLE")
GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "DOUBLE")
GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "DOUBLE")
GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "DOUBLE")
GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "DOUBLE")
GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "DOUBLE")
GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "DOUBLE")
endif ()
add_library(interface OBJECT ${OPENBLAS_SRC}) add_library(interface OBJECT ${OPENBLAS_SRC})

View File

@ -329,7 +329,10 @@ CCBLAS3OBJS = \
cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \
cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \
cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\
cblas_cgeadd.$(SUFFIX) cblas_xerbla.$(SUFFIX) cblas_cgeadd.$(SUFFIX)
CXERBLAOBJ = \
cblas_xerbla.$(SUFFIX)
@ -391,6 +394,8 @@ ZBLAS2OBJS += $(CZBLAS2OBJS)
ZBLAS3OBJS += $(CZBLAS3OBJS) ZBLAS3OBJS += $(CZBLAS3OBJS)
SHEXTOBJS += $(CSHEXTOBJS) SHEXTOBJS += $(CSHEXTOBJS)
CBAUXOBJS += $(CXERBLAOBJ)
endif endif
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
@ -434,13 +439,11 @@ QLAPACKOBJS = \
# cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ # cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
# clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) # clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
CLAPACKOBJS = \ CLAPACKOBJS = \
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX)
#ZLAPACKOBJS = \ #ZLAPACKOBJS = \
# zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ # zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
# zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ # zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
@ -469,8 +472,42 @@ ZBLASOBJS += $(ZLAPACKOBJS)
endif endif
FUNCOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
ifeq ($(BUILD_DOUBLE),1)
SBLASOBJS = dsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) strsm.$(SUFFIX) \
sgetrs.$(SUFFIX) sgetrf.$(SUFFIX) spotf2.$(SUFFIX) spotrf.$(SUFFIX) \
ssyrk.$(SUFFIX) sgemv.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX),1)
SBLASOBJS = \
sdot.$(SUFFIX) srot.$(SUFFIX) snrm2.$(SUFFIX) sswap.$(SUFFIX) \
isamax.$(SUFFIX) saxpy.$(SUFFIX) sscal.$(SUFFIX) scopy.$(SUFFIX) \
sgemv.$(SUFFIX) sgemm.$(SUFFIX)
endif
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
DBLASOBJS = \
ddot.$(SUFFIX) drot.$(SUFFIX) dnrm2.$(SUFFIX) dswap.$(SUFFIX) \
idamax.$(SUFFIX) daxpy.$(SUFFIX) dscal.$(SUFFIX) dcopy.$(SUFFIX) \
dgemv.$(SUFFIX) dgemm.$(SUFFIX)
endif
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS = cgetrs.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) cgetrf.$(SUFFIX) \
cpotrf.$(SUFFIX) ctrsm.$(SUFFIX) cblas_cdotc_sub.$(SUFFIX)
endif
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
FUNCOBJS = $(SHEXTOBJS) $(CXERBLAOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
$(info FUNCOBJS = {[$(FUNCOBJS)]} )
ifdef EXPRECISION ifdef EXPRECISION
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS)
endif endif
@ -481,6 +518,7 @@ endif
FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=)
include $(TOPDIR)/Makefile.tail include $(TOPDIR)/Makefile.tail
all :: libs all :: libs
@ -503,11 +541,14 @@ level1 : $(BEXTOBJS) $(SHBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
aux : $(CBAUXOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
$(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ $(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS $(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c
$(CC) $(CFLAGS) -c $< -o $(@F) $(CC) $(CFLAGS) -c $< -o $(@F)
@ -2268,3 +2309,4 @@ cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c
cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c
$(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F)

View File

@ -91,6 +91,59 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE")
if ((BUILD_COMPLEX OR BUILD_DOUBLE) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE")
if (DEFINED SMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE")
endif ()
if (DEFINED SMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE")
endif ()
if (DEFINED ISMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE")
endif ()
if (DEFINED ISMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE")
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE")
if (DEFINED DMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED DMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE")
endif ()
if (DEFINED IDMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE")
endif ()
# Makefile.L2 # Makefile.L2
GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3)
GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3)
@ -124,7 +177,14 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type})
endif () endif ()
endforeach () endforeach ()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE")
endif ()
if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
endif ()
# Makefile.L3 # Makefile.L3
set(USE_TRMM false) set(USE_TRMM false)
if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE)) if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE))
@ -159,6 +219,38 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
endif () endif ()
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
endforeach() endforeach()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE")
if (DGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
if (DGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE")
endif ()
if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE")
if (SGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE")
endif ()
foreach (float_type ${FLOAT_TYPES}) foreach (float_type ${FLOAT_TYPES})
string(SUBSTRING ${float_type} 0 1 float_char) string(SUBSTRING ${float_type} 0 1 float_char)
@ -499,7 +591,31 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
#geadd #geadd
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type})
endforeach () endforeach ()
if (BUILD_DOUBLE AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE")
endif ()
# Makefile.LA # Makefile.LA
if(NOT NO_LAPACK) if(NOT NO_LAPACK)
@ -526,6 +642,28 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type})
GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type})
endforeach() endforeach()
if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
if (NOT DEFINED SNEG_TCOPY)
set(SNEG_TCOPY ../generic/neg_tcopy.c)
endif ()
if (NOT DEFINED SLASWP_NCOPY)
set(SLASWP_NCOPY ../generic/laswp_ncopy.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}_${SGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}_${SGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "SINGLE")
endif()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
if (NOT DEFINED DNEG_TCOPY)
set(DNEG_TCOPY ../generic/neg_tcopy.c)
endif ()
if (NOT DEFINED DLASWP_NCOPY)
set(DLASWP_NCOPY ../generic/laswp_ncopy.c)
endif ()
GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "DOUBLE")
GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "DOUBLE")
endif()
endif() endif()
if (${DYNAMIC_ARCH}) if (${DYNAMIC_ARCH})
@ -557,8 +695,147 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type})
endforeach () endforeach ()
if (BUILD_COMPLEX AND NOT BUILD_SINGLE)
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE")
endif () endif ()
if (BUILD_DOUBLE AND NOT BUILD_SINGLE)
GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "SINGLE")
GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "SINGLE")
if (SGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE")
endif ()
if (SGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE")
endif ()
GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE")
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE")
endif ()
if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX)
GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX")
if (DEFINED CMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${CMAXKERNEL}" "" "max_k" false "" "" false "COMPLEX")
endif ()
if (DEFINED CMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${CMINKERNEL}" "USE_MIN" "min_k" false "" "" false "COMPLEX")
endif ()
GenerateNamedObjects("${KERNELDIR}/${ICAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${ICAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "COMPLEX")
if (DEFINED ICMAXKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ICMAXKERNEL}" "" "i*max_k" false "" "" false "COMPLEX")
endif ()
if (DEFINED ICMINKERNEL)
GenerateNamedObjects("${KERNELDIR}/${ICMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "COMPLEX")
endif ()
GenerateNamedObjects("${KERNELDIR}/${CASUMKERNEL}" "" "asum_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "" "axpy_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CNRM2KERNEL}" "" "nrm2_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CROTKERNEL}" "" "rot_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CSCALKERNEL}" "" "scal_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CSWAPKERNEL}" "" "swap_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAXPBYKERNEL}" "" "axpby_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CSUMKERNEL}" "" "sum_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "" "dotu_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "CONJ" "dotc_k" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "" "gemv_n" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false "COMPLEX")
GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false "COMPLEX")
if (CGEMMINCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMINCOPY}" "COMPLEX" "${CGEMMINCOPYOBJ}" false "" "" true "COMPLEX")
endif ()
if (CGEMMITCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMITCOPY}" "COMPLEX" "${CGEMMITCOPYOBJ}" false "" "" true "COMPLEX")
endif ()
if (CGEMMONCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMONCOPY}" "COMPLEX" "${CGEMMONCOPYOBJ}" false "" "" true "COMPLEX")
endif ()
if (CGEMMOTCOPY)
GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX")
endif ()
GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "COMPLEX")
GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "COMPLEX")
endif ()
endif ()
add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC})
set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}") set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}")
@ -573,7 +850,7 @@ if (${DYNAMIC_ARCH})
set(BUILD_KERNEL 1) set(BUILD_KERNEL 1)
set(KDIR "") set(KDIR "")
set(TSUFFIX "_${TARGET_CORE}") set(TSUFFIX "_${TARGET_CORE}")
set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}")
build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}")
set(ADD_COMMONOBJS 0) set(ADD_COMMONOBJS 0)
endforeach() endforeach()

View File

@ -186,31 +186,46 @@ ifndef XHEMV_M_KERNEL
XHEMV_M_KERNEL = ../generic/zhemv_k.c XHEMV_M_KERNEL = ../generic/zhemv_k.c
endif endif
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
SBLASOBJS += \ SBLASOBJS += \
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \ sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \
sger_k$(TSUFFIX).$(SUFFIX) sger_k$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \ DBLASOBJS += \
dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \ dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \
dger_k$(TSUFFIX).$(SUFFIX) dger_k$(TSUFFIX).$(SUFFIX)
endif
QBLASOBJS += \ QBLASOBJS += \
qgemv_n$(TSUFFIX).$(SUFFIX) qgemv_t$(TSUFFIX).$(SUFFIX) qsymv_U$(TSUFFIX).$(SUFFIX) qsymv_L$(TSUFFIX).$(SUFFIX) \ qgemv_n$(TSUFFIX).$(SUFFIX) qgemv_t$(TSUFFIX).$(SUFFIX) qsymv_U$(TSUFFIX).$(SUFFIX) qsymv_L$(TSUFFIX).$(SUFFIX) \
qger_k$(TSUFFIX).$(SUFFIX) qger_k$(TSUFFIX).$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
SBLASOBJS += \
sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX)
CBLASOBJS += \ CBLASOBJS += \
cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \ cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \
cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) \ cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) \
csymv_U$(TSUFFIX).$(SUFFIX) csymv_L$(TSUFFIX).$(SUFFIX) \ csymv_U$(TSUFFIX).$(SUFFIX) csymv_L$(TSUFFIX).$(SUFFIX) \
chemv_U$(TSUFFIX).$(SUFFIX) chemv_L$(TSUFFIX).$(SUFFIX) chemv_V$(TSUFFIX).$(SUFFIX) chemv_M$(TSUFFIX).$(SUFFIX) \ chemv_U$(TSUFFIX).$(SUFFIX) chemv_L$(TSUFFIX).$(SUFFIX) chemv_V$(TSUFFIX).$(SUFFIX) chemv_M$(TSUFFIX).$(SUFFIX) \
cgeru_k$(TSUFFIX).$(SUFFIX) cgerc_k$(TSUFFIX).$(SUFFIX) cgerv_k$(TSUFFIX).$(SUFFIX) cgerd_k$(TSUFFIX).$(SUFFIX) cgeru_k$(TSUFFIX).$(SUFFIX) cgerc_k$(TSUFFIX).$(SUFFIX) cgerv_k$(TSUFFIX).$(SUFFIX) cgerd_k$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
CBLASOBJS += \
cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \
cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX)
DBLASOBJS += \
dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX)
ZBLASOBJS += \ ZBLASOBJS += \
zgemv_n$(TSUFFIX).$(SUFFIX) zgemv_t$(TSUFFIX).$(SUFFIX) zgemv_r$(TSUFFIX).$(SUFFIX) zgemv_c$(TSUFFIX).$(SUFFIX) \ zgemv_n$(TSUFFIX).$(SUFFIX) zgemv_t$(TSUFFIX).$(SUFFIX) zgemv_r$(TSUFFIX).$(SUFFIX) zgemv_c$(TSUFFIX).$(SUFFIX) \
zgemv_o$(TSUFFIX).$(SUFFIX) zgemv_u$(TSUFFIX).$(SUFFIX) zgemv_s$(TSUFFIX).$(SUFFIX) zgemv_d$(TSUFFIX).$(SUFFIX) \ zgemv_o$(TSUFFIX).$(SUFFIX) zgemv_u$(TSUFFIX).$(SUFFIX) zgemv_s$(TSUFFIX).$(SUFFIX) zgemv_d$(TSUFFIX).$(SUFFIX) \
zsymv_U$(TSUFFIX).$(SUFFIX) zsymv_L$(TSUFFIX).$(SUFFIX) \ zsymv_U$(TSUFFIX).$(SUFFIX) zsymv_L$(TSUFFIX).$(SUFFIX) \
zhemv_U$(TSUFFIX).$(SUFFIX) zhemv_L$(TSUFFIX).$(SUFFIX) zhemv_V$(TSUFFIX).$(SUFFIX) zhemv_M$(TSUFFIX).$(SUFFIX) \ zhemv_U$(TSUFFIX).$(SUFFIX) zhemv_L$(TSUFFIX).$(SUFFIX) zhemv_V$(TSUFFIX).$(SUFFIX) zhemv_M$(TSUFFIX).$(SUFFIX) \
zgeru_k$(TSUFFIX).$(SUFFIX) zgerc_k$(TSUFFIX).$(SUFFIX) zgerv_k$(TSUFFIX).$(SUFFIX) zgerd_k$(TSUFFIX).$(SUFFIX) zgeru_k$(TSUFFIX).$(SUFFIX) zgerc_k$(TSUFFIX).$(SUFFIX) zgerv_k$(TSUFFIX).$(SUFFIX) zgerd_k$(TSUFFIX).$(SUFFIX)
endif
XBLASOBJS += \ XBLASOBJS += \
xgemv_n$(TSUFFIX).$(SUFFIX) xgemv_t$(TSUFFIX).$(SUFFIX) xgemv_r$(TSUFFIX).$(SUFFIX) xgemv_c$(TSUFFIX).$(SUFFIX) \ xgemv_n$(TSUFFIX).$(SUFFIX) xgemv_t$(TSUFFIX).$(SUFFIX) xgemv_r$(TSUFFIX).$(SUFFIX) xgemv_c$(TSUFFIX).$(SUFFIX) \
@ -219,17 +234,21 @@ XBLASOBJS += \
xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \ xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX) xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)
ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" ""
$(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@
$(KDIR)sgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)sgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@
endif
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
$(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@ $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@
$(KDIR)dgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)dgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@ $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@
endif
$(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL) $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@
@ -237,6 +256,8 @@ $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)qgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVTKERNEL) $(KDIR)qgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVTKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
$(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
@ -260,6 +281,10 @@ $(KDIR)cgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE
$(KDIR)cgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)cgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
@ -284,6 +309,7 @@ $(KDIR)zgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE
$(KDIR)zgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(KDIR)zgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP)
$(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
endif
$(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL) $(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@
@ -309,17 +335,25 @@ $(KDIR)xgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE
$(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVTKERNEL) $(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVTKERNEL)
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@
ifeq ($(BUILD_SINGLE),1)
$(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM) $(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@ $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@
$(KDIR)ssymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_L_KERNEL) $(SSYMV_L_PARAM) $(KDIR)ssymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_L_KERNEL) $(SSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@ $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@
endif
ifeq ($(BUILD_DOUBLE),1)
$(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM) $(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@ $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@
$(KDIR)dsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_L_KERNEL) $(DSYMV_L_PARAM) $(KDIR)dsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_L_KERNEL) $(DSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@ $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@
endif
$(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL) $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@ $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@
@ -327,17 +361,23 @@ $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)qsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_L_KERNEL) $(KDIR)qsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_L_KERNEL)
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@ $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@
ifeq ($(BUILD_COMPLEX),1)
$(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM) $(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@
$(KDIR)csymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_L_KERNEL) $(CSYMV_L_PARAM) $(KDIR)csymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_L_KERNEL) $(CSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM) $(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@
$(KDIR)zsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_L_KERNEL) $(ZSYMV_L_PARAM) $(KDIR)zsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_L_KERNEL) $(ZSYMV_L_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@
endif
$(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL) $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@
@ -345,15 +385,23 @@ $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)xsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_L_KERNEL) $(KDIR)xsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_L_KERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@
ifeq ($(BUILD_SINGLE),1)
$(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM) $(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM)
$(CC) -c $(CFLAGS) -UDOUBLE $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE $< -o $@
endif
ifeq ($(BUILD_DOUBLE),1)
$(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM) $(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM)
$(CC) -c $(CFLAGS) -DDOUBLE $< -o $@ $(CC) -c $(CFLAGS) -DDOUBLE $< -o $@
endif
$(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM) $(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM)
$(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@
ifeq ($(BUILD_COMPLEX),1)
$(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM) $(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM)
$(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@
@ -365,6 +413,9 @@ $(KDIR)cgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)cgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERCKERNEL) $(CGERPARAM) $(KDIR)cgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERCKERNEL) $(CGERPARAM)
$(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@ $(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM) $(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM)
$(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@ $(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@
@ -377,6 +428,7 @@ $(KDIR)zgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)zgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERCKERNEL) $(ZGERPARAM) $(KDIR)zgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERCKERNEL) $(ZGERPARAM)
$(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@ $(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@
endif
$(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM) $(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM)
$(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@
@ -390,6 +442,8 @@ $(KDIR)xgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM) $(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM)
$(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@ $(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@
ifeq ($(BUILD_COMPLEX),1)
$(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM) $(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@
@ -401,6 +455,9 @@ $(KDIR)chemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_V$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)chemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_M_KERNEL) $(CHEMV_L_PARAM) ../symcopy.h $(KDIR)chemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_M_KERNEL) $(CHEMV_L_PARAM) ../symcopy.h
$(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
endif
ifeq ($(BUILD_COMPLEX16),1)
$(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM) $(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM)
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@
@ -413,7 +470,7 @@ $(KDIR)zhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)zhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_M_KERNEL) $(ZHEMV_L_PARAM) ../symcopy.h $(KDIR)zhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_M_KERNEL) $(ZHEMV_L_PARAM) ../symcopy.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@
endif
$(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL) $(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL)
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@
@ -426,3 +483,4 @@ $(KDIR)xhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER
$(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h $(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h
$(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@

View File

@ -100,8 +100,10 @@ SHKERNELOBJS += \
$(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) $(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ)
endif endif
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
SKERNELOBJS += \ SKERNELOBJS += \
sgemm_kernel$(TSUFFIX).$(SUFFIX) \ sgemm_kernel$(TSUFFIX).$(SUFFIX) \
sgemm_beta$(TSUFFIX).$(SUFFIX) \
$(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \
$(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ) $(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ)
@ -110,28 +112,36 @@ SKERNELOBJS += \
sgemm_direct$(TSUFFIX).$(SUFFIX) \ sgemm_direct$(TSUFFIX).$(SUFFIX) \
sgemm_direct_performant$(TSUFFIX).$(SUFFIX) sgemm_direct_performant$(TSUFFIX).$(SUFFIX)
endif endif
endif
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
DKERNELOBJS += \ DKERNELOBJS += \
dgemm_beta$(TSUFFIX).$(SUFFIX) \
dgemm_kernel$(TSUFFIX).$(SUFFIX) \ dgemm_kernel$(TSUFFIX).$(SUFFIX) \
$(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \
$(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) $(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ)
endif
QKERNELOBJS += \ QKERNELOBJS += \
qgemm_kernel$(TSUFFIX).$(SUFFIX) \ qgemm_kernel$(TSUFFIX).$(SUFFIX) \
$(QGEMMINCOPYOBJ) $(QGEMMITCOPYOBJ) \ $(QGEMMINCOPYOBJ) $(QGEMMITCOPYOBJ) \
$(QGEMMONCOPYOBJ) $(QGEMMOTCOPYOBJ) $(QGEMMONCOPYOBJ) $(QGEMMOTCOPYOBJ)
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CKERNELOBJS += \ CKERNELOBJS += \
cgemm_kernel_n$(TSUFFIX).$(SUFFIX) cgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ cgemm_kernel_n$(TSUFFIX).$(SUFFIX) cgemm_kernel_r$(TSUFFIX).$(SUFFIX) \
cgemm_kernel_l$(TSUFFIX).$(SUFFIX) cgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ cgemm_kernel_l$(TSUFFIX).$(SUFFIX) cgemm_kernel_b$(TSUFFIX).$(SUFFIX) \
$(CGEMMINCOPYOBJ) $(CGEMMITCOPYOBJ) \ $(CGEMMINCOPYOBJ) $(CGEMMITCOPYOBJ) \
$(CGEMMONCOPYOBJ) $(CGEMMOTCOPYOBJ) $(CGEMMONCOPYOBJ) $(CGEMMOTCOPYOBJ)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZKERNELOBJS += \ ZKERNELOBJS += \
zgemm_kernel_n$(TSUFFIX).$(SUFFIX) zgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ zgemm_kernel_n$(TSUFFIX).$(SUFFIX) zgemm_kernel_r$(TSUFFIX).$(SUFFIX) \
zgemm_kernel_l$(TSUFFIX).$(SUFFIX) zgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ zgemm_kernel_l$(TSUFFIX).$(SUFFIX) zgemm_kernel_b$(TSUFFIX).$(SUFFIX) \
$(ZGEMMINCOPYOBJ) $(ZGEMMITCOPYOBJ) \ $(ZGEMMINCOPYOBJ) $(ZGEMMITCOPYOBJ) \
$(ZGEMMONCOPYOBJ) $(ZGEMMOTCOPYOBJ) $(ZGEMMONCOPYOBJ) $(ZGEMMOTCOPYOBJ)
endif
XKERNELOBJS += \ XKERNELOBJS += \
xgemm_kernel_n$(TSUFFIX).$(SUFFIX) xgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ xgemm_kernel_n$(TSUFFIX).$(SUFFIX) xgemm_kernel_r$(TSUFFIX).$(SUFFIX) \
@ -153,38 +163,48 @@ ifeq ($(BUILD_HALF),1)
SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX)
endif endif
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS += \ SBLASOBJS += \
sgemm_beta$(TSUFFIX).$(SUFFIX) \ sgemm_beta$(TSUFFIX).$(SUFFIX) \
strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
strmm_kernel_RN$(TSUFFIX).$(SUFFIX) strmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ strmm_kernel_RN$(TSUFFIX).$(SUFFIX) strmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
strsm_kernel_LN$(TSUFFIX).$(SUFFIX) strsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ strsm_kernel_LN$(TSUFFIX).$(SUFFIX) strsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \ DBLASOBJS += \
dgemm_beta$(TSUFFIX).$(SUFFIX) \ dgemm_beta$(TSUFFIX).$(SUFFIX) \
dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX)
endif
QBLASOBJS += \ QBLASOBJS += \
qgemm_beta$(TSUFFIX).$(SUFFIX) \ qgemm_beta$(TSUFFIX).$(SUFFIX) \
qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \ CBLASOBJS += \
cgemm_beta$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \
ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS += \
cgemm_beta$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \ ZBLASOBJS += \
zgemm_beta$(TSUFFIX).$(SUFFIX) \ zgemm_beta$(TSUFFIX).$(SUFFIX) \
ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \
@ -194,7 +214,8 @@ ZBLASOBJS += \
ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \
ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX)
endif
XBLASOBJS += \ XBLASOBJS += \
xgemm_beta$(TSUFFIX).$(SUFFIX) \ xgemm_beta$(TSUFFIX).$(SUFFIX) \
@ -205,7 +226,7 @@ XBLASOBJS += \
xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX)
ifeq ($(USE_GEMM3M), 1) ifeq ($(USE_GEMM3M), 1)
@ -215,6 +236,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX)
endif endif
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \ SBLASOBJS += \
strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \ strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \
strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@ -223,7 +245,10 @@ SBLASOBJS += \
strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \ strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \
strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \ strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \
strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \ strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \
strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \ strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS += \
strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \ strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \
strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \
strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \ strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \
@ -231,10 +256,15 @@ SBLASOBJS += \
strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \ strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \
strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \ strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \
strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \ strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \
strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \ strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \
ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \ ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \
ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX) ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \ DBLASOBJS += \
dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@ -254,6 +284,7 @@ DBLASOBJS += \
dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \
dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \
dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX) dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX)
endif
QBLASOBJS += \ QBLASOBJS += \
qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
@ -273,8 +304,9 @@ QBLASOBJS += \
qtrsm_outucopy$(TSUFFIX).$(SUFFIX) qtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ qtrsm_outucopy$(TSUFFIX).$(SUFFIX) qtrsm_outncopy$(TSUFFIX).$(SUFFIX) \
qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \
qsymm_iutcopy$(TSUFFIX).$(SUFFIX) qsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ qsymm_iutcopy$(TSUFFIX).$(SUFFIX) qsymm_iltcopy$(TSUFFIX).$(SUFFIX) \
qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \ CBLASOBJS += \
ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@ -284,6 +316,13 @@ CBLASOBJS += \
ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) \
ctrmm_outucopy$(TSUFFIX).$(SUFFIX) ctrmm_outncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_outucopy$(TSUFFIX).$(SUFFIX) ctrmm_outncopy$(TSUFFIX).$(SUFFIX) \
ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) \
csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \
csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \
chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \
chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX)
endif
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS += \
ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) \
@ -291,12 +330,10 @@ CBLASOBJS += \
ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_outucopy$(TSUFFIX).$(SUFFIX) ctrsm_outncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_outucopy$(TSUFFIX).$(SUFFIX) ctrsm_outncopy$(TSUFFIX).$(SUFFIX) \
ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX)
csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ endif
csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \
chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \
chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX)
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \ ZBLASOBJS += \
ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \
@ -318,6 +355,7 @@ ZBLASOBJS += \
zsymm_outcopy$(TSUFFIX).$(SUFFIX) zsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ zsymm_outcopy$(TSUFFIX).$(SUFFIX) zsymm_oltcopy$(TSUFFIX).$(SUFFIX) \
zhemm_iutcopy$(TSUFFIX).$(SUFFIX) zhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ zhemm_iutcopy$(TSUFFIX).$(SUFFIX) zhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
zhemm_outcopy$(TSUFFIX).$(SUFFIX) zhemm_oltcopy$(TSUFFIX).$(SUFFIX) zhemm_outcopy$(TSUFFIX).$(SUFFIX) zhemm_oltcopy$(TSUFFIX).$(SUFFIX)
endif
XBLASOBJS += \ XBLASOBJS += \
xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \
@ -343,6 +381,7 @@ XBLASOBJS += \
ifeq ($(USE_GEMM3M), 1) ifeq ($(USE_GEMM3M), 1)
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \ CBLASOBJS += \
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \
@ -362,7 +401,9 @@ CBLASOBJS += \
chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \
chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \
chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) chemm3m_olcopyi$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \ ZBLASOBJS += \
zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \
@ -382,6 +423,7 @@ ZBLASOBJS += \
zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \
zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \
zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX)
endif
XBLASOBJS += \ XBLASOBJS += \
xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \
@ -406,20 +448,25 @@ XBLASOBJS += \
endif endif
###### BLAS extensions ##### ###### BLAS extensions #####
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS += \ SBLASOBJS += \
somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
sgeadd_k$(TSUFFIX).$(SUFFIX) sgeadd_k$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS += \ DBLASOBJS += \
domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
dgeadd_k$(TSUFFIX).$(SUFFIX) dgeadd_k$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS += \ CBLASOBJS += \
comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
@ -430,7 +477,9 @@ CBLASOBJS += \
cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
cgeadd_k$(TSUFFIX).$(SUFFIX) cgeadd_k$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS += \ ZBLASOBJS += \
zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \
zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \
@ -441,6 +490,7 @@ ZBLASOBJS += \
zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \
zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \
zgeadd_k$(TSUFFIX).$(SUFFIX) zgeadd_k$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(BUILD_HALF), 1) ifeq ($(BUILD_HALF), 1)
SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))

View File

@ -114,6 +114,7 @@ gotoblas_t TABLE_NAME = {
#endif #endif
#endif #endif
#if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
0, 0, 0, 0, 0, 0,
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N,
#ifdef SGEMM_DEFAULT_UNROLL_MN #ifdef SGEMM_DEFAULT_UNROLL_MN
@ -121,7 +122,7 @@ gotoblas_t TABLE_NAME = {
#else #else
MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N),
#endif #endif
#endif
#ifdef HAVE_EXCLUSIVE_CACHE #ifdef HAVE_EXCLUSIVE_CACHE
1, 1,
@ -129,19 +130,38 @@ gotoblas_t TABLE_NAME = {
0, 0,
#endif #endif
#if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
samax_kTS, samin_kTS, smax_kTS, smin_kTS, samax_kTS, samin_kTS, smax_kTS, smin_kTS,
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS, snrm2_kTS, sasum_kTS,
dsdot_kTS, #endif
srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, #if BUILD_SINGLE == 1
sgemv_nTS, sgemv_tTS, sger_kTS, ssum_kTS,
ssymv_LTS, ssymv_UTS, #endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
scopy_kTS, sdot_kTS,
// dsdot_kTS,
srot_kTS, saxpy_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1)
sscal_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
sswap_kTS,
sgemv_nTS, sgemv_tTS,
#endif
#if BUILD_SINGLE == 1
sger_kTS,
ssymv_LTS, ssymv_UTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1)
#ifdef ARCH_X86_64 #ifdef ARCH_X86_64
sgemm_directTS, sgemm_directTS,
sgemm_direct_performantTS, sgemm_direct_performantTS,
#endif #endif
sgemm_kernelTS, sgemm_betaTS, sgemm_kernelTS, sgemm_betaTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
sgemm_incopyTS, sgemm_itcopyTS, sgemm_incopyTS, sgemm_itcopyTS,
@ -149,6 +169,9 @@ gotoblas_t TABLE_NAME = {
sgemm_oncopyTS, sgemm_otcopyTS, sgemm_oncopyTS, sgemm_otcopyTS,
#endif #endif
sgemm_oncopyTS, sgemm_otcopyTS, sgemm_oncopyTS, sgemm_otcopyTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS, strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS, strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS,
@ -159,6 +182,8 @@ gotoblas_t TABLE_NAME = {
#endif #endif
strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS, strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS,
strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS, strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS,
#endif
#if BUILD_SINGLE == 1
strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS, strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS,
#if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N
strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS, strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS,
@ -175,13 +200,16 @@ gotoblas_t TABLE_NAME = {
ssymm_outcopyTS, ssymm_oltcopyTS, ssymm_outcopyTS, ssymm_oltcopyTS,
#endif #endif
ssymm_outcopyTS, ssymm_oltcopyTS, ssymm_outcopyTS, ssymm_oltcopyTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
#ifndef NO_LAPACK #ifndef NO_LAPACK
sneg_tcopyTS, slaswp_ncopyTS, sneg_tcopyTS, slaswp_ncopyTS,
#else #else
NULL,NULL, NULL,NULL,
#endif #endif
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
0, 0, 0, 0, 0, 0,
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N,
#ifdef DGEMM_DEFAULT_UNROLL_MN #ifdef DGEMM_DEFAULT_UNROLL_MN
@ -189,14 +217,36 @@ gotoblas_t TABLE_NAME = {
#else #else
MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N),
#endif #endif
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, damax_kTS, damin_kTS, dmax_kTS, dmin_kTS,
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS,
dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS, dnrm2_kTS, dasum_kTS,
drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS, #endif
dgemv_nTS, dgemv_tTS, dger_kTS, #if (BUILD_DOUBLE==1)
dsum_kTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
dcopy_kTS, ddot_kTS,
#endif
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1)
dsdot_kTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
drot_kTS,
daxpy_kTS,
dscal_kTS,
dswap_kTS,
dgemv_nTS, dgemv_tTS,
#endif
#if (BUILD_DOUBLE==1)
dger_kTS,
dsymv_LTS, dsymv_UTS, dsymv_LTS, dsymv_UTS,
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
dgemm_kernelTS, dgemm_betaTS, dgemm_kernelTS, dgemm_betaTS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
dgemm_incopyTS, dgemm_itcopyTS, dgemm_incopyTS, dgemm_itcopyTS,
@ -204,6 +254,9 @@ gotoblas_t TABLE_NAME = {
dgemm_oncopyTS, dgemm_otcopyTS, dgemm_oncopyTS, dgemm_otcopyTS,
#endif #endif
dgemm_oncopyTS, dgemm_otcopyTS, dgemm_oncopyTS, dgemm_otcopyTS,
#endif
#if (BUILD_DOUBLE==1)
dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS, dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS,
#if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N
dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS, dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS,
@ -237,6 +290,8 @@ gotoblas_t TABLE_NAME = {
NULL, NULL, NULL, NULL,
#endif #endif
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
0, 0, 0, 0, 0, 0,
@ -291,6 +346,7 @@ gotoblas_t TABLE_NAME = {
#endif #endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
0, 0, 0, 0, 0, 0,
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N,
#ifdef CGEMM_DEFAULT_UNROLL_MN #ifdef CGEMM_DEFAULT_UNROLL_MN
@ -298,21 +354,34 @@ gotoblas_t TABLE_NAME = {
#else #else
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N),
#endif #endif
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, camax_kTS, camin_kTS, icamax_kTS, icamin_kTS,
cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS, #endif
cdotu_kTS, cdotc_kTS, csrot_kTS, #if (BUILD_COMPLEX)
caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS, cnrm2_kTS, casum_kTS, csum_kTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
ccopy_kTS, cdotu_kTS, cdotc_kTS,
#endif
#if (BUILD_COMPLEX)
csrot_kTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
caxpy_kTS,
caxpyc_kTS,
cscal_kTS,
cswap_kTS,
cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS, cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS,
cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS, cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS,
#endif
#if (BUILD_COMPLEX)
cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS, cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS,
csymv_LTS, csymv_UTS, csymv_LTS, csymv_UTS,
chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS, chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS,
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS, cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS,
cgemm_betaTS, cgemm_betaTS,
#if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N
cgemm_incopyTS, cgemm_itcopyTS, cgemm_incopyTS, cgemm_itcopyTS,
#else #else
@ -332,6 +401,8 @@ gotoblas_t TABLE_NAME = {
#endif #endif
ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS, ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS,
ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS, ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS,
#endif
#if (BUILD_COMPLEX)
ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS, ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS,
ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS, ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS,
@ -361,7 +432,7 @@ gotoblas_t TABLE_NAME = {
0, 0, 0, 0, 0, 0,
#if defined(USE_GEMM3M) #if (USE_GEMM3M)
#ifdef CGEMM3M_DEFAULT_UNROLL_M #ifdef CGEMM3M_DEFAULT_UNROLL_M
CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N),
#else #else
@ -419,13 +490,20 @@ gotoblas_t TABLE_NAME = {
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
#endif #endif
#endif
#if (BUILD_COMPLEX || BUILD_COMPLEX16)
#ifndef NO_LAPACK #ifndef NO_LAPACK
cneg_tcopyTS, claswp_ncopyTS, cneg_tcopyTS,
claswp_ncopyTS,
#else #else
NULL, NULL, NULL, NULL,
#endif #endif
#endif
#if BUILD_COMPLEX16 == 1
0, 0, 0, 0, 0, 0,
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N,
#ifdef ZGEMM_DEFAULT_UNROLL_MN #ifdef ZGEMM_DEFAULT_UNROLL_MN
@ -495,7 +573,7 @@ gotoblas_t TABLE_NAME = {
zhemm_outcopyTS, zhemm_oltcopyTS, zhemm_outcopyTS, zhemm_oltcopyTS,
0, 0, 0, 0, 0, 0,
#if defined(USE_GEMM3M) #if (USE_GEMM3M)
#ifdef ZGEMM3M_DEFAULT_UNROLL_M #ifdef ZGEMM3M_DEFAULT_UNROLL_M
ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N),
#else #else
@ -560,6 +638,8 @@ gotoblas_t TABLE_NAME = {
NULL, NULL, NULL, NULL,
#endif #endif
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
0, 0, 0, 0, 0, 0,
@ -626,7 +706,7 @@ gotoblas_t TABLE_NAME = {
xhemm_outcopyTS, xhemm_oltcopyTS, xhemm_outcopyTS, xhemm_oltcopyTS,
0, 0, 0, 0, 0, 0,
#if defined(USE_GEMM3M) #if (USE_GEMM3M)
QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N),
xgemm3m_kernelTS, xgemm3m_kernelTS,
@ -691,52 +771,112 @@ gotoblas_t TABLE_NAME = {
init_parameter, init_parameter,
SNUMOPT, DNUMOPT, QNUMOPT, SNUMOPT, DNUMOPT, QNUMOPT,
#if BUILD_SINGLE == 1
saxpby_kTS,
#endif
#if BUILD_DOUBLE == 1
daxpby_kTS,
#endif
#if BUILD_COMPLEX == 1
caxpby_kTS,
#endif
#if BUILD_COMPLEX16== 1
zaxpby_kTS,
#endif
saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS, #if BUILD_SINGLE == 1
somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS, somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS,
#endif
#if BUILD_DOUBLE== 1
domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS, domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS,
#endif
#if BUILD_COMPLEX == 1
comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS, comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS,
comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS, comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS,
#endif
#if BUILD_COMPLEX16 == 1
zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS,
zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS,
#endif
#if BUILD_SINGLE == 1
simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS,
#endif
#if BUILD_DOUBLE== 1
dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS,
#endif
#if BUILD_COMPLEX== 1
cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS,
cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS,
#endif
#if BUILD_COMPLEX16==1
zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS,
zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS,
#endif
sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS #if BUILD_SINGLE == 1
sgeadd_kTS,
#endif
#if BUILD_DOUBLE==1
dgeadd_kTS,
#endif
#if BUILD_COMPLEX==1
cgeadd_kTS,
#endif
#if BUILD_COMPLEX16==1
zgeadd_kTS
#endif
}; };
#if defined(ARCH_ARM64) #if (ARCH_ARM64)
static void init_parameter(void) { static void init_parameter(void) {
#if defined(BUILD_HALF) #if (BUILD_HALF)
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#if defined(BUILD_HALF) #if (BUILD_HALF)
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
#endif #endif
#if BUILD_SINGLE == 1
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
#endif
#if BUILD_DOUBLE== 1
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX== 1
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
#endif
#if defined(BUILD_HALF) #if (BUILD_HALF)
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
#endif #endif
#if BUILD_SINGLE == 1
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
#endif
#if BUILD_DOUBLE==1
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
@ -747,7 +887,7 @@ static void init_parameter(void) {
TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R; TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R;
#endif #endif
#if defined(USE_GEMM3M) #if (USE_GEMM3M)
#ifdef CGEMM3M_DEFAULT_P #ifdef CGEMM3M_DEFAULT_P
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else #else
@ -792,8 +932,8 @@ static void init_parameter(void) {
#endif #endif
} }
#else // defined(ARCH_ARM64) #else // (ARCH_ARM64)
#if defined(ARCH_POWER) #if (ARCH_POWER)
static void init_parameter(void) { static void init_parameter(void) {
#ifdef BUILD_HALF #ifdef BUILD_HALF
@ -823,7 +963,7 @@ static void init_parameter(void) {
} }
#else //POWER #else //POWER
#if defined(ARCH_ZARCH) #if (ARCH_ZARCH)
static void init_parameter(void) { static void init_parameter(void) {
#ifdef BUILD_HALF #ifdef BUILD_HALF
TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P;
@ -989,22 +1129,34 @@ static void init_parameter(void) {
TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R;
TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q;
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX == 1
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
#endif
#if BUILD_COMPLEX == 1
#ifdef CGEMM3M_DEFAULT_Q #ifdef CGEMM3M_DEFAULT_Q
TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q;
#else #else
TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q; TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q;
#endif #endif
#endif
#if BUILD_COMPLEX16 == 1
#ifdef ZGEMM3M_DEFAULT_Q #ifdef ZGEMM3M_DEFAULT_Q
TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q;
#else #else
TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q; TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q;
#endif #endif
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q;
@ -1012,16 +1164,24 @@ static void init_parameter(void) {
TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q; TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q;
#endif #endif
#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) #if (CORE_KATMAI) || (CORE_COPPERMINE) || (CORE_BANIAS) || (CORE_YONAH) || (CORE_ATHLON)
#ifdef DEBUG #ifdef DEBUG
fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n"); fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 64 * (l2 >> 7); TABLE_NAME.sgemm_p = 64 * (l2 >> 7);
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = 32 * (l2 >> 7); TABLE_NAME.dgemm_p = 32 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 32 * (l2 >> 7); TABLE_NAME.cgemm_p = 32 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 16 * (l2 >> 7); TABLE_NAME.zgemm_p = 16 * (l2 >> 7);
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 16 * (l2 >> 7); TABLE_NAME.qgemm_p = 16 * (l2 >> 7);
TABLE_NAME.xgemm_p = 8 * (l2 >> 7); TABLE_NAME.xgemm_p = 8 * (l2 >> 7);
@ -1034,10 +1194,18 @@ static void init_parameter(void) {
fprintf(stderr, "Northwood\n"); fprintf(stderr, "Northwood\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 96 * (l2 >> 7); TABLE_NAME.sgemm_p = 96 * (l2 >> 7);
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = 48 * (l2 >> 7); TABLE_NAME.dgemm_p = 48 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 48 * (l2 >> 7); TABLE_NAME.cgemm_p = 48 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 24 * (l2 >> 7); TABLE_NAME.zgemm_p = 24 * (l2 >> 7);
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 24 * (l2 >> 7); TABLE_NAME.qgemm_p = 24 * (l2 >> 7);
TABLE_NAME.xgemm_p = 12 * (l2 >> 7); TABLE_NAME.xgemm_p = 12 * (l2 >> 7);
@ -1050,10 +1218,18 @@ static void init_parameter(void) {
fprintf(stderr, "Atom\n"); fprintf(stderr, "Atom\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 256; TABLE_NAME.sgemm_p = 256;
#endif
#if BUILD_DOUBLE ==1
TABLE_NAME.dgemm_p = 128; TABLE_NAME.dgemm_p = 128;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 128; TABLE_NAME.cgemm_p = 128;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 64; TABLE_NAME.zgemm_p = 64;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 64; TABLE_NAME.qgemm_p = 64;
TABLE_NAME.xgemm_p = 32; TABLE_NAME.xgemm_p = 32;
@ -1066,10 +1242,18 @@ static void init_parameter(void) {
fprintf(stderr, "Prescott\n"); fprintf(stderr, "Prescott\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 56 * (l2 >> 7); TABLE_NAME.sgemm_p = 56 * (l2 >> 7);
#endif
#if BUILD_DOUBLE ==1
TABLE_NAME.dgemm_p = 28 * (l2 >> 7); TABLE_NAME.dgemm_p = 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 28 * (l2 >> 7); TABLE_NAME.cgemm_p = 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16 == 1
TABLE_NAME.zgemm_p = 14 * (l2 >> 7); TABLE_NAME.zgemm_p = 14 * (l2 >> 7);
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 14 * (l2 >> 7); TABLE_NAME.qgemm_p = 14 * (l2 >> 7);
TABLE_NAME.xgemm_p = 7 * (l2 >> 7); TABLE_NAME.xgemm_p = 7 * (l2 >> 7);
@ -1082,10 +1266,18 @@ static void init_parameter(void) {
fprintf(stderr, "Core2\n"); fprintf(stderr, "Core2\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE==1
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
@ -1098,10 +1290,18 @@ static void init_parameter(void) {
fprintf(stderr, "Penryn\n"); fprintf(stderr, "Penryn\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE == 1
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
@ -1114,10 +1314,18 @@ static void init_parameter(void) {
fprintf(stderr, "Dunnington\n"); fprintf(stderr, "Dunnington\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_DOUBLE ==1
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
@ -1131,10 +1339,18 @@ static void init_parameter(void) {
fprintf(stderr, "Nehalem\n"); fprintf(stderr, "Nehalem\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1147,10 +1363,18 @@ static void init_parameter(void) {
fprintf(stderr, "Sandybridge\n"); fprintf(stderr, "Sandybridge\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1163,26 +1387,42 @@ static void init_parameter(void) {
fprintf(stderr, "Haswell\n"); fprintf(stderr, "Haswell\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
#endif #endif
#endif #endif
#if defined (SKYLAKEX) || defined (COOPERLAKE) #if defined(SKYLAKEX) || defined(COOPERLAKE)
#ifdef DEBUG #ifdef DEBUG
fprintf(stderr, "SkylakeX\n"); fprintf(stderr, "SkylakeX\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1196,10 +1436,18 @@ static void init_parameter(void) {
fprintf(stderr, "Opteron\n"); fprintf(stderr, "Opteron\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7); TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7);
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7); TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7); TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7);
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7); TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7);
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7); TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7);
TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7); TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7);
@ -1212,10 +1460,18 @@ static void init_parameter(void) {
fprintf(stderr, "Barcelona\n"); fprintf(stderr, "Barcelona\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1228,10 +1484,18 @@ static void init_parameter(void) {
fprintf(stderr, "Bobcate\n"); fprintf(stderr, "Bobcate\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1244,10 +1508,18 @@ static void init_parameter(void) {
fprintf(stderr, "Bulldozer\n"); fprintf(stderr, "Bulldozer\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1260,10 +1532,18 @@ static void init_parameter(void) {
fprintf(stderr, "Excavator\n"); fprintf(stderr, "Excavator\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1277,10 +1557,18 @@ static void init_parameter(void) {
fprintf(stderr, "Piledriver\n"); fprintf(stderr, "Piledriver\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1293,10 +1581,18 @@ static void init_parameter(void) {
fprintf(stderr, "Steamroller\n"); fprintf(stderr, "Steamroller\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1309,10 +1605,18 @@ static void init_parameter(void) {
fprintf(stderr, "Zen\n"); fprintf(stderr, "Zen\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if BUILD_DOUBLE
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if BUILD_COMPLEX16
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P;
TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P;
@ -1326,11 +1630,18 @@ static void init_parameter(void) {
fprintf(stderr, "NANO\n"); fprintf(stderr, "NANO\n");
#endif #endif
#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1)
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
#endif
#if (BUILD_DOUBLE==1)
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
#endif
#if (BUILD_COMPLEX==1)
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
#endif
#if (BUILD_COMPLEX16==1)
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
@ -1340,41 +1651,55 @@ static void init_parameter(void) {
#endif #endif
#if BUILD_COMPLEX==1
#ifdef CGEMM3M_DEFAULT_P #ifdef CGEMM3M_DEFAULT_P
TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P;
#else #else
TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p;
#endif #endif
#endif
#if BUILD_COMPLEX16==1
#ifdef ZGEMM3M_DEFAULT_P #ifdef ZGEMM3M_DEFAULT_P
TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P;
#else #else
TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p;
#endif #endif
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p;
#endif #endif
#if BUILD_SINGLE == 1
TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
#endif
#if BUILD_DOUBLE== 1
TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
#endif
#if BUILD_COMPLEX==1
TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M; TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M;
#endif
#if BUILD_COMPLEX16==1
TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M; TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M;
#endif
#if BUILD_COMPLEX==1
#ifdef CGEMM3M_DEFAULT_UNROLL_M #ifdef CGEMM3M_DEFAULT_UNROLL_M
TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M; TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M;
#else #else
TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M;
#endif #endif
#endif
#if BUILD_COMPLEX16==1
#ifdef ZGEMM3M_DEFAULT_UNROLL_M #ifdef ZGEMM3M_DEFAULT_UNROLL_M
TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M; TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M;
#else #else
TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M;
#endif #endif
#endif
#ifdef QUAD_PRECISION #ifdef QUAD_PRECISION
TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M; TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M;
@ -1386,15 +1711,19 @@ static void init_parameter(void) {
fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p);
#endif #endif
#if BUILD_SINGLE==1
TABLE_NAME.sgemm_r = (((BUFFER_SIZE - TABLE_NAME.sgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15); ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15);
#endif
#if BUILD_DOUBLE==1
TABLE_NAME.dgemm_r = (((BUFFER_SIZE - TABLE_NAME.dgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15); ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15);
#endif
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_r = (((BUFFER_SIZE - TABLE_NAME.qgemm_r = (((BUFFER_SIZE -
@ -1403,26 +1732,33 @@ static void init_parameter(void) {
) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15); ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15);
#endif #endif
#if BUILD_COMPLEX ==1
TABLE_NAME.cgemm_r = (((BUFFER_SIZE - TABLE_NAME.cgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15); ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15);
#endif
#if BUILD_COMPLEX16 ==1
TABLE_NAME.zgemm_r = (((BUFFER_SIZE - TABLE_NAME.zgemm_r = (((BUFFER_SIZE -
((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15); ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15);
#endif
#if BUILD_COMPLEX == 1
TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE - TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE -
((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15); ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15);
#endif
#if BUILD_COMPLEX16 == 1
TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE - TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE -
((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA
+ TABLE_NAME.align) & ~TABLE_NAME.align) + TABLE_NAME.align) & ~TABLE_NAME.align)
) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15); ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15);
#endif
@ -1444,4 +1780,4 @@ static void init_parameter(void) {
} }
#endif //POWER #endif //POWER
#endif //ZARCH #endif //ZARCH
#endif //defined(ARCH_ARM64) #endif //(ARCH_ARM64)

View File

@ -46,6 +46,7 @@ OBJ = \
lapacke_ilaver.o \ lapacke_ilaver.o \
lapacke_nancheck.o lapacke_nancheck.o
ifeq ($(BUILD_COMPLEX),1)
OBJ_C = \ OBJ_C = \
lapacke_cbbcsd.o \ lapacke_cbbcsd.o \
lapacke_cbbcsd_work.o \ lapacke_cbbcsd_work.o \
@ -653,7 +654,9 @@ lapacke_cupgtr.o \
lapacke_cupgtr_work.o \ lapacke_cupgtr_work.o \
lapacke_cupmtr.o \ lapacke_cupmtr.o \
lapacke_cupmtr_work.o lapacke_cupmtr_work.o
endif
ifeq ($(BUILD_DOUBLE),1)
OBJ_D = \ OBJ_D = \
lapacke_dbbcsd.o \ lapacke_dbbcsd.o \
lapacke_dbbcsd_work.o \ lapacke_dbbcsd_work.o \
@ -1218,8 +1221,12 @@ lapacke_dtrttf_work.o \
lapacke_dtrttp.o \ lapacke_dtrttp.o \
lapacke_dtrttp_work.o \ lapacke_dtrttp_work.o \
lapacke_dtzrzf.o \ lapacke_dtzrzf.o \
lapacke_dtzrzf_work.o lapacke_dtzrzf_work.o \
lapacke_slag2d.o \
lapacke_slag2d_work.o
endif
ifeq ($(BUILD_SINGLE),1)
OBJ_S = \ OBJ_S = \
lapacke_sbbcsd.o \ lapacke_sbbcsd.o \
lapacke_sbbcsd_work.o \ lapacke_sbbcsd_work.o \
@ -1395,8 +1402,6 @@ lapacke_slacn2.o \
lapacke_slacn2_work.o \ lapacke_slacn2_work.o \
lapacke_slacpy.o \ lapacke_slacpy.o \
lapacke_slacpy_work.o \ lapacke_slacpy_work.o \
lapacke_slag2d.o \
lapacke_slag2d_work.o \
lapacke_slamch.o \ lapacke_slamch.o \
lapacke_slamch_work.o \ lapacke_slamch_work.o \
lapacke_slange.o \ lapacke_slange.o \
@ -1781,7 +1786,9 @@ lapacke_strttp.o \
lapacke_strttp_work.o \ lapacke_strttp_work.o \
lapacke_stzrzf.o \ lapacke_stzrzf.o \
lapacke_stzrzf_work.o lapacke_stzrzf_work.o
endif
ifeq ($(BUILD_COMPLEX16),1)
OBJ_Z = \ OBJ_Z = \
lapacke_zbbcsd.o \ lapacke_zbbcsd.o \
lapacke_zbbcsd_work.o \ lapacke_zbbcsd_work.o \
@ -2393,35 +2400,52 @@ lapacke_zupgtr.o \
lapacke_zupgtr_work.o \ lapacke_zupgtr_work.o \
lapacke_zupmtr.o \ lapacke_zupmtr.o \
lapacke_zupmtr_work.o lapacke_zupmtr_work.o
endif
ifdef BUILD_DEPRECATED ifdef BUILD_DEPRECATED
DEPRECATED = \ ifeq ($(BUILD_COMPLEX),1)
DEPRECATEDC = \
lapacke_cggsvp.o \ lapacke_cggsvp.o \
lapacke_cggsvp_work.o \ lapacke_cggsvp_work.o \
lapacke_dggsvp.o \
lapacke_dggsvp_work.o \
lapacke_sggsvp.o \
lapacke_sggsvp_work.o \
lapacke_zggsvp.o \
lapacke_zggsvp_work.o \
lapacke_cggsvd.o \ lapacke_cggsvd.o \
lapacke_cggsvd_work.o \ lapacke_cggsvd_work.o \
lapacke_cgeqpf.o \
lapacke_cgeqpf_work.o
endif
ifeq ($(BUILD_DOUBLE),1)
DEPRECATEDD = \
lapacke_dggsvp.o \
lapacke_dggsvp_work.o \
lapacke_dggsvd.o \ lapacke_dggsvd.o \
lapacke_dggsvd_work.o \ lapacke_dggsvd_work.o \
lapacke_dgeqpf.o \
lapacke_dgeqpf_work.o
endif
ifeq ($(BUILD_SINGLE),1)
DEPRECATEDS = \
lapacke_sggsvp.o \
lapacke_sggsvp_work.o \
lapacke_sggsvd.o \ lapacke_sggsvd.o \
lapacke_sggsvd_work.o \ lapacke_sggsvd_work.o \
lapacke_sgeqpf.o \
lapacke_sgeqpf_work.o
endif
ifeq ($(BUILD_COMPLEX16),1)
DEPRECATEDZ = \
lapacke_zggsvp.o \
lapacke_zggsvp_work.o \
lapacke_zggsvd.o \ lapacke_zggsvd.o \
lapacke_zggsvd_work.o \ lapacke_zggsvd_work.o \
lapacke_cgeqpf.o \
lapacke_cgeqpf_work.o \
lapacke_dgeqpf.o \
lapacke_dgeqpf_work.o \
lapacke_sgeqpf.o \
lapacke_sgeqpf_work.o \
lapacke_zgeqpf.o \ lapacke_zgeqpf.o \
lapacke_zgeqpf_work.o lapacke_zgeqpf_work.o
endif endif
DEPRECATED = $(DEPRECATEDS) $(DEPRECATEDD) $(DEPRECATEDC) $(DEPRECATEDZ)
endif
ifdef USEXBLAS ifdef USEXBLAS
EXTENDED = \ EXTENDED = \
lapacke_cgbrfsx.o lapacke_cporfsx.o lapacke_dgerfsx.o lapacke_sgbrfsx.o lapacke_ssyrfsx.o lapacke_zherfsx.o \ lapacke_cgbrfsx.o lapacke_cporfsx.o lapacke_dgerfsx.o lapacke_sgbrfsx.o lapacke_ssyrfsx.o lapacke_zherfsx.o \
@ -2440,37 +2464,50 @@ endif
ifdef LAPACKE_WITH_TMG ifdef LAPACKE_WITH_TMG
# FILE PARTS OF TMGLIB # FILE PARTS OF TMGLIB
MATGEN = \ ifeq ($(BUILD_COMPLEX),1)
MATGENC = \
lapacke_clatms.o \ lapacke_clatms.o \
lapacke_clatms_work.o \ lapacke_clatms_work.o \
lapacke_dlatms.o \
lapacke_dlatms_work.o \
lapacke_slatms.o \
lapacke_slatms_work.o \
lapacke_zlatms.o \
lapacke_zlatms_work.o \
lapacke_clagge.o \ lapacke_clagge.o \
lapacke_clagge_work.o \ lapacke_clagge_work.o \
lapacke_dlagge.o \
lapacke_dlagge_work.o \
lapacke_slagge.o \
lapacke_slagge_work.o \
lapacke_zlagge.o \
lapacke_zlagge_work.o \
lapacke_claghe.o \ lapacke_claghe.o \
lapacke_claghe_work.o \ lapacke_claghe_work.o \
lapacke_clagsy.o \
lapacke_clagsy_work.o
endif
ifeq ($(BUILD_DOUBLE),1)
MATGEND = \
lapacke_dlatms.o \
lapacke_dlatms_work.o \
lapacke_dlagge.o \
lapacke_dlagge_work.o \
lapacke_dlagsy.o \
lapacke_dlagsy_work.o
endif
ifeq ($(BUILD_SINGLE),1)
MATGENS = \
lapacke_slatms.o \
lapacke_slatms_work.o \
lapacke_slagge.o \
lapacke_slagge_work.o \
lapacke_slagsy.o \
lapacke_slagsy_work.o
endif
ifeq ($(BUILD_COMPLEX16),1)
MATGENZ = \
lapacke_zlatms.o \
lapacke_zlatms_work.o \
lapacke_zlagge.o \
lapacke_zlagge_work.o \
lapacke_zlaghe.o \ lapacke_zlaghe.o \
lapacke_zlaghe_work.o \ lapacke_zlaghe_work.o \
lapacke_clagsy.o \
lapacke_clagsy_work.o \
lapacke_dlagsy.o \
lapacke_dlagsy_work.o \
lapacke_slagsy.o \
lapacke_slagsy_work.o \
lapacke_zlagsy.o \ lapacke_zlagsy.o \
lapacke_zlagsy_work.o lapacke_zlagsy_work.o
endif endif
MATGEN = $(MATGENS) $(MATGEND) $(MATGENC) $(MATGENZ)
endif
.PHONY: all .PHONY: all
all: $(LAPACKELIB) all: $(LAPACKELIB)

View File

@ -66,7 +66,9 @@ ALLAUX_O = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \
ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \
../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o
ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" ""
SCLAUX = \ SCLAUX = \
sbdsvdx.o sstevx.o sstein.o \
sbdsdc.o \ sbdsdc.o \
sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \ sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \
slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \ slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \
@ -81,10 +83,14 @@ SCLAUX = \
slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \ slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \
slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \ slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \
ssteqr.o ssterf.o slaisnan.o sisnan.o \ ssteqr.o ssterf.o slaisnan.o sisnan.o \
slartgp.o slartgs.o \ slartgp.o slartgs.o scombssq.o \
../INSTALL/second_$(TIMER).o ../INSTALL/second_$(TIMER).o
endif
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
DZLAUX = \ DZLAUX = \
dcombssq.o \
dbdsvdx.o dstevx.o dstein.o \
dbdsdc.o \ dbdsdc.o \
dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \ dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \
dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \ dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \
@ -101,9 +107,12 @@ DZLAUX = \
dsteqr.o dsterf.o dlaisnan.o disnan.o \ dsteqr.o dsterf.o dlaisnan.o disnan.o \
dlartgp.o dlartgs.o \ dlartgp.o dlartgs.o \
../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o
endif
#ifeq ($(BUILD_SINGLE),1)
ifdef BUILD_SINGLE
SLASRC_O = \ SLASRC_O = \
sbdsvdx.o spotrf2.o sgetrf2.o \ spotrf2.o sgetrf2.o \
sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \ sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \
sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \ sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \
sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \ sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \
@ -145,8 +154,7 @@ SLASRC_O = \
ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \ ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \
ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \ ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \
sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \ sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \
ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \ ssptrf.o ssptri.o ssptrs.o sstegr.o sstev.o sstevd.o sstevr.o \
sstevx.o \
ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \ ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \
ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \ ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \
ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \ ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \
@ -180,9 +188,13 @@ SLASRC_O = \
ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \
ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \
ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \ ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \
sgesvdq.o scombssq.o sgesvdq.o
endif
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
DSLASRC_O = spotrs.o sgetrs.o spotrf.o sgetrf.o DSLASRC_O = spotrs.o sgetrs.o spotrf.o sgetrf.o
endif
ifdef USEXBLAS ifdef USEXBLAS
SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
@ -194,6 +206,7 @@ SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \
slascl2.o sla_wwaddw.o slascl2.o sla_wwaddw.o
endif endif
ifeq ($(BUILD_COMPLEX),1)
CLASRC_O = \ CLASRC_O = \
cpotrf2.o cgetrf2.o \ cpotrf2.o cgetrf2.o \
cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \ cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \
@ -284,6 +297,7 @@ CLASRC_O = \
cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \
chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \ chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \
cgesvdq.o cgesvdq.o
endif
ifdef USEXBLAS ifdef USEXBLAS
CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
@ -299,11 +313,13 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \
cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o
endif endif
ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o clag2z.o
endif
ifeq ($(BUILD_DOUBLE),1)
DLASRC_O = \ DLASRC_O = \
dpotrf2.o dgetrf2.o \ dpotrf2.o dgetrf2.o \
dbdsvdx.o \
dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \ dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \
dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \
dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \
@ -345,8 +361,7 @@ DLASRC_O = \
dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \ dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \
dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \ dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \
dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \ dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \
dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \ dsptrf.o dsptri.o dsptrs.o dstegr.o dstev.o dstevd.o dstevr.o \
dstevx.o \
dsycon.o dsyev.o dsyevd.o dsyevr.o \ dsycon.o dsyev.o dsyevd.o dsyevr.o \
dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \ dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \
dsysv.o dsysvx.o \ dsysv.o dsysvx.o \
@ -381,7 +396,8 @@ DLASRC_O = \
dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \
dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \
dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \ dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \
dgesvdq.o dcombssq.o dgesvdq.o
endif
ifdef USEXBLAS ifdef USEXBLAS
DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
@ -393,6 +409,7 @@ DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \
dlascl2.o dla_wwaddw.o dlascl2.o dla_wwaddw.o
endif endif
ifeq ($(BUILD_COMPLEX16),1)
ZLASRC_O = \ ZLASRC_O = \
zpotrf2.o zgetrf2.o \ zpotrf2.o zgetrf2.o \
zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \ zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \
@ -471,7 +488,7 @@ ZLASRC_O = \
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
zunmtr.o zupgtr.o \ zunmtr.o zupgtr.o \
zupmtr.o izmax1.o dzsum1.o zstemr.o \ zupmtr.o izmax1.o dzsum1.o zstemr.o \
zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \ zcgesv.o zcposv.o zlag2c.o zlat2c.o \
zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \ zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \
ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \ ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \
zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \ zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \
@ -488,6 +505,7 @@ ZLASRC_O = \
zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \
zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \ zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \
zgesvdq.o zgesvdq.o
endif
ifdef USEXBLAS ifdef USEXBLAS
ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
@ -501,18 +519,30 @@ ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \
zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o
endif endif
DEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ ifeq ($(BUILD_COMPLEX),1)
CDEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \
DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \ DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \
DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o \ DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o
endif
ifeq ($(BUILD_DOUBLE),1)
DDEPRECSRC = \
DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \ DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \
DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \ DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \
DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o \ DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o
endif
ifeq ($(BUILD_SINGLE),1)
SDEPRECSRC = \
DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \ DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \
DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \ DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \
DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o \ DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o
endif
ifeq ($(BUILD_COMPLEX16),1)
ZDEPRECSRC = \
DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \ DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \
DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \ DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \
DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o
endif
# filter out optimized codes from OpenBLAS # filter out optimized codes from OpenBLAS
ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o
@ -560,7 +590,7 @@ ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC)
endif endif
ifdef BUILD_DEPRECATED ifdef BUILD_DEPRECATED
DEPRECATED = $(DEPRECSRC) DEPRECATED = $(SDEPRECSRC) $(DDEPRECSRC) $(CDEPRECSRC) $(ZDEPRECSRC)
endif endif
.PHONY: all .PHONY: all

View File

@ -33,25 +33,37 @@
TOPSRCDIR = ../.. TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc include $(TOPSRCDIR)/make.inc
ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" ""
SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o
endif
ifeq ($(BUILD_SINGLE),1)
SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \ SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \
slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \ slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \
slatm3.o slatm5.o slatm6.o slahilb.o slatm3.o slatm5.o slatm6.o slahilb.o
endif
ifeq ($(BUILD_COMPLEX),1)
CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \ CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \
clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \ clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \
clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o
endif
ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" ""
DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o
endif
ifeq ($(BUILD_DOUBLE),1)
DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \ DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \
dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \ dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \
dlatm3.o dlatm5.o dlatm6.o dlahilb.o dlatm3.o dlatm5.o dlatm6.o dlahilb.o
endif
ifeq ($(BUILD_COMPLEX16),1)
ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \ ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \
zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \ zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \
zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o
endif
.PHONY: all .PHONY: all
all: $(TMGLIB) all: $(TMGLIB)
@ -97,5 +109,9 @@ cleanobj:
cleanlib: cleanlib:
rm -f $(TMGLIB) rm -f $(TMGLIB)
ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),)
slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
endif
ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),)
dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
endif

View File

@ -1,11 +1,19 @@
TOPDIR = ../.. TOPDIR = ../..
include ../../Makefile.system include ../../Makefile.system
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS = sgetf2_k.$(SUFFIX) SBLASOBJS = sgetf2_k.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dgetf2_k.$(SUFFIX) DBLASOBJS = dgetf2_k.$(SUFFIX)
endif
QBLASOBJS = qgetf2_k.$(SUFFIX) QBLASOBJS = qgetf2_k.$(SUFFIX)
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS = cgetf2_k.$(SUFFIX) CBLASOBJS = cgetf2_k.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = zgetf2_k.$(SUFFIX) ZBLASOBJS = zgetf2_k.$(SUFFIX)
endif
XBLASOBJS = xgetf2_k.$(SUFFIX) XBLASOBJS = xgetf2_k.$(SUFFIX)
sgetf2_k.$(SUFFIX) : getf2_k.c sgetf2_k.$(SUFFIX) : getf2_k.c

View File

@ -17,6 +17,19 @@ ZBLASOBJS += zgetrf_parallel.$(SUFFIX)
XBLASOBJS += xgetrf_parallel.$(SUFFIX) XBLASOBJS += xgetrf_parallel.$(SUFFIX)
endif endif
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
GETRF_SRC = getrf_parallel_omp.c GETRF_SRC = getrf_parallel_omp.c
else else

View File

@ -17,6 +17,19 @@ ZBLASOBJS += zgetrs_N_parallel.$(SUFFIX) zgetrs_T_parallel.$(SUFFIX) zgetrs_R_pa
XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX) XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX)
endif endif
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
sgetrs_N_single.$(SUFFIX) : getrs_single.c sgetrs_N_single.$(SUFFIX) : getrs_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANS $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANS $< -o $(@F)

View File

@ -1,11 +1,19 @@
TOPDIR = ../.. TOPDIR = ../..
include ../../Makefile.system include ../../Makefile.system
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS = slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) SBLASOBJS = slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) DBLASOBJS = dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX)
endif
QBLASOBJS = qlaswp_plus.$(SUFFIX) qlaswp_minus.$(SUFFIX) QBLASOBJS = qlaswp_plus.$(SUFFIX) qlaswp_minus.$(SUFFIX)
ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS = claswp_plus.$(SUFFIX) claswp_minus.$(SUFFIX) CBLASOBJS = claswp_plus.$(SUFFIX) claswp_minus.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = zlaswp_plus.$(SUFFIX) zlaswp_minus.$(SUFFIX) ZBLASOBJS = zlaswp_plus.$(SUFFIX) zlaswp_minus.$(SUFFIX)
endif
XBLASOBJS = xlaswp_plus.$(SUFFIX) xlaswp_minus.$(SUFFIX) XBLASOBJS = xlaswp_plus.$(SUFFIX) xlaswp_minus.$(SUFFIX)
slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) \ slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) \

View File

@ -1,11 +1,19 @@
TOPDIR = ../.. TOPDIR = ../..
include ../../Makefile.system include ../../Makefile.system
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS = slauu2_U.$(SUFFIX) slauu2_L.$(SUFFIX) SBLASOBJS = slauu2_U.$(SUFFIX) slauu2_L.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dlauu2_U.$(SUFFIX) dlauu2_L.$(SUFFIX) DBLASOBJS = dlauu2_U.$(SUFFIX) dlauu2_L.$(SUFFIX)
endif
QBLASOBJS = qlauu2_U.$(SUFFIX) qlauu2_L.$(SUFFIX) QBLASOBJS = qlauu2_U.$(SUFFIX) qlauu2_L.$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS = clauu2_U.$(SUFFIX) clauu2_L.$(SUFFIX) CBLASOBJS = clauu2_U.$(SUFFIX) clauu2_L.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = zlauu2_U.$(SUFFIX) zlauu2_L.$(SUFFIX) ZBLASOBJS = zlauu2_U.$(SUFFIX) zlauu2_L.$(SUFFIX)
endif
XBLASOBJS = xlauu2_U.$(SUFFIX) xlauu2_L.$(SUFFIX) XBLASOBJS = xlauu2_U.$(SUFFIX) xlauu2_L.$(SUFFIX)
slauu2_U.$(SUFFIX) : lauu2_U.c slauu2_U.$(SUFFIX) : lauu2_U.c

View File

@ -17,6 +17,19 @@ ZBLASOBJS += zlauum_U_parallel.$(SUFFIX) zlauum_L_parallel.$(SUFFIX)
XBLASOBJS += xlauum_U_parallel.$(SUFFIX) xlauum_L_parallel.$(SUFFIX) XBLASOBJS += xlauum_U_parallel.$(SUFFIX) xlauum_L_parallel.$(SUFFIX)
endif endif
ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
slauum_U_single.$(SUFFIX) : lauum_U_single.c slauum_U_single.$(SUFFIX) : lauum_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F)

View File

@ -8,6 +8,19 @@ CBLASOBJS = cpotf2_U.$(SUFFIX) cpotf2_L.$(SUFFIX)
ZBLASOBJS = zpotf2_U.$(SUFFIX) zpotf2_L.$(SUFFIX) ZBLASOBJS = zpotf2_U.$(SUFFIX) zpotf2_L.$(SUFFIX)
XBLASOBJS = xpotf2_U.$(SUFFIX) xpotf2_L.$(SUFFIX) XBLASOBJS = xpotf2_U.$(SUFFIX) xpotf2_L.$(SUFFIX)
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
spotf2_U.$(SUFFIX) : potf2_U.c spotf2_U.$(SUFFIX) : potf2_U.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F)

View File

@ -17,6 +17,20 @@ ZBLASOBJS += zpotrf_U_parallel.$(SUFFIX) zpotrf_L_parallel.$(SUFFIX)
XBLASOBJS += xpotrf_U_parallel.$(SUFFIX) xpotrf_L_parallel.$(SUFFIX) XBLASOBJS += xpotrf_U_parallel.$(SUFFIX) xpotrf_L_parallel.$(SUFFIX)
endif endif
ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" ""
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
spotrf_U_single.$(SUFFIX) : potrf_U_single.c spotrf_U_single.$(SUFFIX) : potrf_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F)

View File

@ -1,11 +1,19 @@
TOPDIR = ../.. TOPDIR = ../..
include ../../Makefile.system include ../../Makefile.system
ifeq ($(BUILD_SINGLE),1)
SBLASOBJS = strti2_UU.$(SUFFIX) strti2_UN.$(SUFFIX) strti2_LU.$(SUFFIX) strti2_LN.$(SUFFIX) SBLASOBJS = strti2_UU.$(SUFFIX) strti2_UN.$(SUFFIX) strti2_LU.$(SUFFIX) strti2_LN.$(SUFFIX)
endif
ifeq ($(BUILD_DOUBLE),1)
DBLASOBJS = dtrti2_UU.$(SUFFIX) dtrti2_UN.$(SUFFIX) dtrti2_LU.$(SUFFIX) dtrti2_LN.$(SUFFIX) DBLASOBJS = dtrti2_UU.$(SUFFIX) dtrti2_UN.$(SUFFIX) dtrti2_LU.$(SUFFIX) dtrti2_LN.$(SUFFIX)
endif
QBLASOBJS = qtrti2_UU.$(SUFFIX) qtrti2_UN.$(SUFFIX) qtrti2_LU.$(SUFFIX) qtrti2_LN.$(SUFFIX) QBLASOBJS = qtrti2_UU.$(SUFFIX) qtrti2_UN.$(SUFFIX) qtrti2_LU.$(SUFFIX) qtrti2_LN.$(SUFFIX)
ifeq ($(BUILD_COMPLEX),1)
CBLASOBJS = ctrti2_UU.$(SUFFIX) ctrti2_UN.$(SUFFIX) ctrti2_LU.$(SUFFIX) ctrti2_LN.$(SUFFIX) CBLASOBJS = ctrti2_UU.$(SUFFIX) ctrti2_UN.$(SUFFIX) ctrti2_LU.$(SUFFIX) ctrti2_LN.$(SUFFIX)
endif
ifeq ($(BUILD_COMPLEX16),1)
ZBLASOBJS = ztrti2_UU.$(SUFFIX) ztrti2_UN.$(SUFFIX) ztrti2_LU.$(SUFFIX) ztrti2_LN.$(SUFFIX) ZBLASOBJS = ztrti2_UU.$(SUFFIX) ztrti2_UN.$(SUFFIX) ztrti2_LU.$(SUFFIX) ztrti2_LN.$(SUFFIX)
endif
XBLASOBJS = xtrti2_UU.$(SUFFIX) xtrti2_UN.$(SUFFIX) xtrti2_LU.$(SUFFIX) xtrti2_LN.$(SUFFIX) XBLASOBJS = xtrti2_UU.$(SUFFIX) xtrti2_UN.$(SUFFIX) xtrti2_LU.$(SUFFIX) xtrti2_LN.$(SUFFIX)
strti2_UU.$(SUFFIX) : trti2_U.c strti2_UU.$(SUFFIX) : trti2_U.c

View File

@ -23,6 +23,19 @@ ZBLASOBJS += ztrtri_UU_parallel.$(SUFFIX) ztrtri_UN_parallel.$(SUFFIX) ztrtri_LU
XBLASOBJS += xtrtri_UU_parallel.$(SUFFIX) xtrtri_UN_parallel.$(SUFFIX) xtrtri_LU_parallel.$(SUFFIX) xtrtri_LN_parallel.$(SUFFIX) XBLASOBJS += xtrtri_UU_parallel.$(SUFFIX) xtrtri_UN_parallel.$(SUFFIX) xtrtri_LU_parallel.$(SUFFIX) xtrtri_LN_parallel.$(SUFFIX)
endif endif
ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
strtri_UU_single.$(SUFFIX) : trtri_U_single.c strtri_UU_single.$(SUFFIX) : trtri_U_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUNIT $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUNIT $< -o $(@F)

View File

@ -17,6 +17,19 @@ ZBLASOBJS += ztrtrs_UNU_parallel.$(SUFFIX) ztrtrs_UNN_parallel.$(SUFFIX) ztrtrs_
XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX) XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX)
endif endif
ifneq ($(BUILD_SINGLE),1)
SBLASOBJS=
endif
ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX),1)
CBLASOBJS=
endif
ifneq ($(BUILD_COMPLEX16),1)
ZBLASOBJS=
endif
strtrs_UNU_single.$(SUFFIX) : trtrs_single.c strtrs_UNU_single.$(SUFFIX) : trtrs_single.c
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F)

View File

@ -4,7 +4,7 @@ include_directories(${PROJECT_BINARY_DIR})
enable_language(Fortran) enable_language(Fortran)
if (BUILD_SINGLE) if (BUILD_SINGLE)
list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3) list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3)
endif() endif()
if (BUILD_DOUBLE) if (BUILD_DOUBLE)
list (APPEND OpenBLAS_Tests dblat1 dblat2 dblat3) list (APPEND OpenBLAS_Tests dblat1 dblat2 dblat3)
@ -17,7 +17,7 @@ if (BUILD_COMPLEX16)
endif() endif()
foreach(test_bin ${OpenBLAS_Tests}) foreach(test_bin ${OpenBLAS_Tests})
add_executable(${test_bin} ${test_bin}.f) add_executable(${test_bin} ${test_bin}.f)
target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}) target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME})
endforeach() endforeach()
@ -34,7 +34,19 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh
"fi\n" "fi\n"
) )
set(float_types s d c z) #set(float_types s d c z)
if (BUILD_SINGLE)
list (APPEND float_types s)
endif()
if (BUILD_DOUBLE)
list (APPEND float_types d)
endif()
if (BUILD_COMPLEX)
list (APPEND float_types c)
endif()
if (BUILD_COMPLEX16)
list (APPEND float_types z)
endif()
foreach(float_type ${float_types}) foreach(float_type ${float_types})
string(TOUPPER ${float_type} float_type_upper) string(TOUPPER ${float_type} float_type_upper)
add_test(NAME "${float_type}blas1" add_test(NAME "${float_type}blas1"

View File

@ -7,82 +7,241 @@ all ::
else else
all :: level1 level2 level3 all :: level1 level2 level3
endif endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1)
level1: sblat1 dblat1 cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1)
level1: dblat1 cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1)
level1: sblat1 cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1)
level1: cblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x)
level1: cblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1)
level1: zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1)
level1: sblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1)
level1: sblat1 dblat1 zblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx)
level1: sblat1 dblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx)
level1: sblat1
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx)
level1: dblat1
endif
level1 : sblat1 dblat1 cblat1 zblat1
ifndef CROSS ifndef CROSS
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1 OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1 OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1 OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1 OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1
endif
ifdef SMP ifdef SMP
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./sblat1 OMP_NUM_THREADS=2 ./sblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./dblat1 OMP_NUM_THREADS=2 ./dblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./cblat1 OMP_NUM_THREADS=2 ./cblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./zblat1 OMP_NUM_THREADS=2 ./zblat1
endif
else else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./sblat1 OPENBLAS_NUM_THREADS=2 ./sblat1
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./dblat1 OPENBLAS_NUM_THREADS=2 ./dblat1
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./cblat1 OPENBLAS_NUM_THREADS=2 ./cblat1
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./zblat1 OPENBLAS_NUM_THREADS=2 ./zblat1
endif endif
endif endif
endif endif
endif
#level2: sblat2 dblat2 cblat2 zblat2
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1)
level2: sblat2 dblat2 cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1)
level2: dblat2 cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1)
level2: sblat2 cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1)
level2: cblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x)
level2: cblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1)
level2: zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1)
level2: sblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1)
level2: sblat2 dblat2 zblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx)
level2: sblat2 dblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx)
level2: sblat2
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx)
level2: dblat2
endif
level2 : sblat2 dblat2 cblat2 zblat2
ifndef CROSS ifndef CROSS
rm -f ?BLAT2.SUMM rm -f ?BLAT2.SUMM
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
endif
ifdef SMP ifdef SMP
rm -f ?BLAT2.SUMM rm -f ?BLAT2.SUMM
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
endif
else else
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat
@$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./cblat2 < ./cblat2.dat OPENBLAS_NUM_THREADS=2 ./cblat2 < ./cblat2.dat
@$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
endif endif
endif endif
endif endif
ifeq ($(BUILD_HALF),1)
level3 : test_shgemm sblat3 dblat3 cblat3 zblat3
else
level3 : sblat3 dblat3 cblat3 zblat3
endif endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1)
level3: sblat3 dblat3 cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1)
level3: dblat3 cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1)
level3: sblat3 cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1)
level3: cblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x)
level3: cblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1)
level3: zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1)
level3: sblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1)
level3: sblat3 dblat3 zblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx)
level3: sblat3 dblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx)
level3: sblat3
endif
ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx)
level3: dblat3
endif
#ifeq ($(BUILD_HALF),1)
#level3 : test_shgemm sblat3 dblat3 cblat3 zblat3
#else
#level3 : sblat3 dblat3 cblat3 zblat3
#endif
ifndef CROSS ifndef CROSS
rm -f ?BLAT3.SUMM rm -f ?BLAT3.SUMM
ifeq ($(BUILD_HALF),1) ifeq ($(BUILD_HALF),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
endif endif
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
endif
ifdef SMP ifdef SMP
rm -f ?BLAT3.SUMM rm -f ?BLAT3.SUMM
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
@ -90,30 +249,46 @@ ifeq ($(BUILD_HALF),1)
OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
endif endif
ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
endif
else else
ifeq ($(BUILD_HALF),1) ifeq ($(BUILD_HALF),1)
OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM
@$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0
endif endif
ifeq ($(BUILD_SINGLE),1)
OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat
@$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_DOUBLE),1)
OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat
@$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX),1)
OPENBLAS_NUM_THREADS=2 ./cblat3 < ./cblat3.dat OPENBLAS_NUM_THREADS=2 ./cblat3 < ./cblat3.dat
@$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0
endif
ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat
@$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0
endif endif
endif endif
endif endif
endif
level3_3m : zblat3_3m cblat3_3m level3_3m : zblat3_3m cblat3_3m
@ -151,56 +326,71 @@ endif
endif endif
endif endif
ifeq ($(BUILD_SINGLE),1)
sblat1 : sblat1.$(SUFFIX) ../$(LIBNAME) sblat1 : sblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat1 sblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o sblat1 sblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
dblat1 : dblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat1 dblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
qblat1 : qblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o qblat1 qblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
cblat1 : cblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat1 cblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME) sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif
ifeq ($(BUILD_DOUBLE),1)
dblat1 : dblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat1 dblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME) dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
else
dblat2:
dblat3:
endif
qblat1 : qblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o qblat1 qblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
ifeq ($(BUILD_COMPLEX),1)
cblat1 : cblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat1 cblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
cblat2 : cblat2.$(SUFFIX) ../$(LIBNAME) cblat2 : cblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat2 cblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o cblat2 cblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif
ifeq ($(BUILD_COMPLEX16),1)
zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
zblat2 : zblat2.$(SUFFIX) ../$(LIBNAME) zblat2 : zblat2.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat2 zblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o zblat2 zblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME) zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif
ifeq ($(BUILD_HALF),1) ifeq ($(BUILD_HALF),1)
test_shgemm : compare_sgemm_shgemm.c ../$(LIBNAME) test_shgemm : compare_sgemm_shgemm.c ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif endif
dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME) ifeq ($(BUILD_COMPLEX),1)
$(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
cblat3_3m : cblat3_3m.$(SUFFIX) ../$(LIBNAME) cblat3_3m : cblat3_3m.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o cblat3_3m cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o cblat3_3m cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif
ifeq ($(BUILD_COMPLEX16),1)
zblat3_3m : zblat3_3m.$(SUFFIX) ../$(LIBNAME) zblat3_3m : zblat3_3m.$(SUFFIX) ../$(LIBNAME)
$(FC) $(FLDFLAGS) -o zblat3_3m zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) $(FC) $(FLDFLAGS) -o zblat3_3m zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
endif

View File

@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/ **********************************************************************************/
#include "openblas_utest.h" #include "openblas_utest.h"
#if defined(BUILD_SINGLE) && defined(BUILD_DOUBLE)
CTEST(dsdot,dsdot_n_1) CTEST(dsdot,dsdot_n_1)
{ {
float x= 0.172555164F; float x= 0.172555164F;
@ -47,17 +47,4 @@ CTEST(dsdot,dsdot_n_1)
ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS); ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS);
} }
#endif
CTEST(dsdot,dsdot_n_2)
{
float x[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F};
float y[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F};
blasint incx=1;
blasint incy=1;
blasint n=8;
double res1=0.0f, res2= 2.0400000444054616;
res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy);
ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS);
}

View File

@ -48,6 +48,7 @@ void* xmalloc(size_t n)
} }
} }
#ifdef BUILD_DOUBLE
void check_dgemm(double *a, double *b, double *result, double *expected, blasint n) void check_dgemm(double *a, double *b, double *result, double *expected, blasint n)
{ {
char trans1 = 'T'; char trans1 = 'T';
@ -59,9 +60,13 @@ void check_dgemm(double *a, double *b, double *result, double *expected, blasint
ASSERT_DBL_NEAR_TOL(expected[i], result[i], DOUBLE_EPS); ASSERT_DBL_NEAR_TOL(expected[i], result[i], DOUBLE_EPS);
} }
} }
#endif
CTEST(fork, safety) CTEST(fork, safety)
{ {
#ifndef BUILD_DOUBLE
exit(0);
#else
blasint n = 1000; blasint n = 1000;
int i; int i;
@ -124,4 +129,5 @@ CTEST(fork, safety)
ASSERT_EQUAL(wait_pid, fork_pid); ASSERT_EQUAL(wait_pid, fork_pid);
ASSERT_EQUAL(0, WEXITSTATUS (child_status)); ASSERT_EQUAL(0, WEXITSTATUS (child_status));
} }
#endif
} }

View File

@ -529,16 +529,20 @@ CTEST(potrf, smoketest_trivial){
for (j = 0; j < n; ++j) { for (j = 0; j < n; ++j) {
double err; double err;
#ifdef BUILD_SINGLE
err = fabs(A1s[i+n*j] - Bs[i+n*j]); err = fabs(A1s[i+n*j] - Bs[i+n*j]);
if (err > 1e-5) { if (err > 1e-5) {
CTEST_ERR("%s:%d %c s(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); CTEST_ERR("%s:%d %c s(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
} }
#endif
#ifdef BUILD_DOUBLE
err = fabs(A1d[i+n*j] - Bd[i+n*j]); err = fabs(A1d[i+n*j] - Bd[i+n*j]);
if (err > 1e-12) { if (err > 1e-12) {
CTEST_ERR("%s:%d %c d(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); CTEST_ERR("%s:%d %c d(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
} }
#endif
#ifdef BUILD_COMPLEX
#ifdef OPENBLAS_COMPLEX_C99 #ifdef OPENBLAS_COMPLEX_C99
err = cabsf(A1c[i+n*j] - Bc[i+n*j]); err = cabsf(A1c[i+n*j] - Bc[i+n*j]);
#else #else
@ -548,7 +552,9 @@ CTEST(potrf, smoketest_trivial){
if (err > 1e-5) { if (err > 1e-5) {
CTEST_ERR("%s:%d %c c(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); CTEST_ERR("%s:%d %c c(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
} }
#endif
#ifdef BUILD_COMPLEX16
#ifdef OPENBLAS_COMPLEX_C99 #ifdef OPENBLAS_COMPLEX_C99
err = cabs(A1z[i+n*j] - Bz[i+n*j]); err = cabs(A1z[i+n*j] - Bz[i+n*j]);
#else #else
@ -558,6 +564,7 @@ CTEST(potrf, smoketest_trivial){
if (err > 1e-12) { if (err > 1e-12) {
CTEST_ERR("%s:%d %c z(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); CTEST_ERR("%s:%d %c z(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err);
} }
#endif
} }
} }
} }