diff --git a/Makefile.system b/Makefile.system index c46c88581..eb6e14a98 100644 --- a/Makefile.system +++ b/Makefile.system @@ -9,7 +9,7 @@ ifndef TOPDIR TOPDIR = . endif -# If ARCH is not set, we use the host system's architecture for getarch compile options. + # If ARCH is not set, we use the host system's architecture for getarch compile options. ifndef ARCH HOSTARCH := $(shell uname -m) else @@ -73,6 +73,18 @@ endif # # Beginning of system configuration # +ifneq ($(BUILD_SINGLE),1) +ifneq ($(BUILD_DOUBLE),1) +ifneq ($(BUILD_COMPLEX),1) +ifneq ($(BUILD_COMPLEX16),1) +override BUILD_SINGLE=1 +override BUILD_DOUBLE=1 +override BUILD_COMPLEX=1 +override BUILD_COMPLEX16=1 +endif +endif +endif +endif ifndef HOSTCC HOSTCC = $(CC) @@ -1224,16 +1236,16 @@ ifeq ($(BUILD_HALF), 1) CCOMMON_OPT += -DBUILD_HALF endif ifeq ($(BUILD_SINGLE), 1) -CCOMMON_OPT += -DBUILD_SINGLE +CCOMMON_OPT += -DBUILD_SINGLE=1 endif ifeq ($(BUILD_DOUBLE), 1) -CCOMMON_OPT += -DBUILD_DOUBLE +CCOMMON_OPT += -DBUILD_DOUBLE=1 endif ifeq ($(BUILD_COMPLEX), 1) -CCOMMON_OPT += -DBUILD_COMPLEX +CCOMMON_OPT += -DBUILD_COMPLEX=1 endif ifeq ($(BUILD_COMPLEX16), 1) -CCOMMON_OPT += -DBUILD_COMPLEX16 +CCOMMON_OPT += -DBUILD_COMPLEX16=1 endif CCOMMON_OPT += -DVERSION=\"$(VERSION)\" diff --git a/Makefile.x86_64 b/Makefile.x86_64 index 65b67bba1..e793a1c2f 100644 --- a/Makefile.x86_64 +++ b/Makefile.x86_64 @@ -9,9 +9,11 @@ endif endif ifdef HAVE_SSE3 +ifndef DYNAMIC_ARCH CCOMMON_OPT += -msse3 FCOMMON_OPT += -msse3 endif +endif ifeq ($(CORE), SKYLAKEX) ifndef DYNAMIC_ARCH diff --git a/common_param.h b/common_param.h index 81b479e53..0fe5e6c1d 100644 --- a/common_param.h +++ b/common_param.h @@ -167,6 +167,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); float (*snrm2_k) (BLASLONG, float *, BLASLONG); float (*sasum_k) (BLASLONG, float *, BLASLONG); #endif + #if BUILD_SINGLE float (*ssum_k) (BLASLONG, float *, BLASLONG); #endif @@ -188,13 +189,15 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); #endif -#if BUILD_SINGLE + +#if BUILD_SINGLE int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); #endif + #if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX) #ifdef ARCH_X86_64 void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG); @@ -210,6 +213,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); #endif + #if (BUILD_SINGLE) || (BUILD_DOUBLE) int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); @@ -304,12 +308,14 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); #endif + #if BUILD_DOUBLE int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); #endif + #if (BUILD_DOUBLE) || (BUILD_COMPLEX16) int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG); int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); @@ -319,6 +325,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *); #endif + #if BUILD_DOUBLE int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG); @@ -466,6 +473,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG); #endif + #if (BUILD_COMPLEX) || (BUILD_COMPLEX16) int cgemm_p, cgemm_q, cgemm_r; int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn; @@ -644,6 +652,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG); int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); #endif + #if BUILD_COMPLEX16 int zgemm_p, zgemm_q, zgemm_r; int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn; @@ -982,6 +991,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); void (*init)(void); int snum_opt, dnum_opt, qnum_opt; + #if BUILD_SINGLE int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG); #endif @@ -995,14 +1005,14 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG); #endif -#if BUILD_SINGLE +#if BUILD_SINGLE int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG); #endif -#if BUILD_DOUBLE +#if BUILD_DOUBLE int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG); diff --git a/ctest/Makefile b/ctest/Makefile index 6f5b65142..cba904f75 100644 --- a/ctest/Makefile +++ b/ctest/Makefile @@ -46,56 +46,155 @@ else all :: all1 all2 all3 endif -all1: xscblat1 xdcblat1 xccblat1 xzcblat1 +ifeq ($(BUILD_SINGLE),1) +all1targets += xscblat1 +endif +ifeq ($(BUILD_DOUBLE),1) +all1targets += xdcblat1 +endif +ifeq ($(BUILD_COMPLEX),1) +all1targets += xccblat1 +endif +ifeq ($(BUILD_COMPLEX16),1) +all1targets += xzcblat1 +endif + +all1: $(all1targets) + ifndef CROSS ifeq ($(USE_OPENMP), 1) +ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./xscblat1 +endif +ifeq ($(BUILD_DOUBLE),1) OMP_NUM_THREADS=2 ./xdcblat1 +endif +ifeq ($(BUILD_COMPLEX),1) OMP_NUM_THREADS=2 ./xccblat1 +endif +ifeq ($(BUILD_COMPLEX16),1) OMP_NUM_THREADS=2 ./xzcblat1 +endif else +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=2 ./xscblat1 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=2 ./xdcblat1 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=2 ./xccblat1 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=2 ./xzcblat1 endif endif +endif + +ifeq ($(BUILD_SINGLE),1) +all2targets += xscblat2 +endif +ifeq ($(BUILD_DOUBLE),1) +all2targets += xdcblat2 +endif +ifeq ($(BUILD_COMPLEX),1) +all2targets += xccblat2 +endif +ifeq ($(BUILD_COMPLEX16),1) +all2targets += xzcblat2 +endif + +all2: $(all2targets) -all2: xscblat2 xdcblat2 xccblat2 xzcblat2 ifndef CROSS ifeq ($(USE_OPENMP), 1) +ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./xscblat2 < sin2 +endif +ifeq ($(BUILD_DOUBLE),1) OMP_NUM_THREADS=2 ./xdcblat2 < din2 +endif +ifeq ($(BUILD_COMPLEX),1) OMP_NUM_THREADS=2 ./xccblat2 < cin2 +endif +ifeq ($(BUILD_COMPLEX16),1) OMP_NUM_THREADS=2 ./xzcblat2 < zin2 +endif else +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 endif endif +endif + + +ifeq ($(BUILD_SINGLE),1) +all3targets += xscblat3 +endif +ifeq ($(BUILD_DOUBLE),1) +all3targets += xdcblat3 +endif +ifeq ($(BUILD_COMPLEX),1) +all3targets += xccblat3 +endif +ifeq ($(BUILD_COMPLEX16),1) +all3targets += xzcblat3 +endif + +all3: $(all3targets) -all3: xscblat3 xdcblat3 xccblat3 xzcblat3 ifndef CROSS ifeq ($(USE_OPENMP), 1) +ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./xscblat3 < sin3 +endif +ifeq ($(BUILD_DOUBLE),1) OMP_NUM_THREADS=2 ./xdcblat3 < din3 +endif +ifeq ($(BUILD_COMPLEX),1) OMP_NUM_THREADS=2 ./xccblat3 < cin3 +endif +ifeq ($(BUILD_COMPLEX16),1) OMP_NUM_THREADS=2 ./xzcblat3 < zin3 +endif else +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3 endif +endif +endif all3_3m: xzcblat3_3m xccblat3_3m ifeq ($(USE_OPENMP), 1) +ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m +endif +ifeq ($(BUILD_COMPLEX16),1) OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m +endif else +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m endif endif @@ -115,13 +214,19 @@ endif endif endif +ifeq ($(BUILD_SINGLE),1) # Single real xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xscblat1 c_sblat1.o $(stestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) + xscblat2: $(stestl2o) c_sblat2.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xscblat2 c_sblat2.o $(stestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) + xscblat3: $(stestl3o) c_sblat3.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xscblat3 c_sblat3.o $(stestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) +endif + +ifeq ($(BUILD_DOUBLE),1) # Double real xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) @@ -129,7 +234,10 @@ xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB) xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) +endif + +ifeq ($(BUILD_COMPLEX),1) # Single complex xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) @@ -140,7 +248,10 @@ xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME) xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) +endif + +ifeq ($(BUILD_COMPLEX16),1) # Double complex xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB) @@ -152,6 +263,6 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME) xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME) $(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) - +endif include $(TOPDIR)/Makefile.tail diff --git a/driver/level2/CMakeLists.txt b/driver/level2/CMakeLists.txt index 8fceba905..61367e596 100644 --- a/driver/level2/CMakeLists.txt +++ b/driver/level2/CMakeLists.txt @@ -197,6 +197,19 @@ foreach (float_type ${FLOAT_TYPES}) endif () endforeach () +if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) + if (USE_THREAD) + GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "SINGLE") + GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "SINGLE") + endif () +endif () +if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + if (USE_THREAD) + GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "DOUBLE") + GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "DOUBLE") + endif () +endif () + if (USE_THREAD) GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2) endif () diff --git a/driver/level2/Makefile b/driver/level2/Makefile index 79c4ca153..7212d6662 100644 --- a/driver/level2/Makefile +++ b/driver/level2/Makefile @@ -417,19 +417,63 @@ XBLASOBJS += \ endif +ifneq ($(BUILD_SINGLE),1) + SBLASOBJS= +ifeq ($(BUILD_DOUBLE),1) +ifdef SMP +SBLASOBJS += \ + sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) \ + strsv_NUU.$(SUFFIX) strsv_NUN.$(SUFFIX) strsv_NLU.$(SUFFIX) strsv_NLN.$(SUFFIX) \ + strsv_TUU.$(SUFFIX) strsv_TUN.$(SUFFIX) strsv_TLU.$(SUFFIX) strsv_TLN.$(SUFFIX) +endif +endif +ifeq ($(BUILD_COMPLEX),1) +ifdef SMP + SBLASOBJS = sgemv_thread_n.$(SUFFIX) sgemv_thread_t.$(SUFFIX) +endif +endif +endif +ifneq ($(BUILD_DOUBLE),1) + DBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) +ifdef SMP + DBLASOBJS = dgemv_thread_n.$(SUFFIX) dgemv_thread_t.$(SUFFIX) +endif +endif +endif +ifneq ($(BUILD_COMPLEX),1) + CBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) + CBLASOBJS= \ + ctrsv_NUU.$(SUFFIX) ctrsv_NUN.$(SUFFIX) ctrsv_NLU.$(SUFFIX) ctrsv_NLN.$(SUFFIX) \ + ctrsv_TUU.$(SUFFIX) ctrsv_TUN.$(SUFFIX) ctrsv_TLU.$(SUFFIX) ctrsv_TLN.$(SUFFIX) \ + ctrsv_RUU.$(SUFFIX) ctrsv_RUN.$(SUFFIX) ctrsv_RLU.$(SUFFIX) ctrsv_RLN.$(SUFFIX) \ + ctrsv_CUU.$(SUFFIX) ctrsv_CUN.$(SUFFIX) ctrsv_CLU.$(SUFFIX) ctrsv_CLN.$(SUFFIX) +endif +endif +ifneq ($(BUILD_COMPLEX16),1) + ZBLASOBJS= +endif + all :: +ifeq ($(BUILD_SINGLE),1) + sgbmv_n.$(SUFFIX) sgbmv_n.$(PSUFFIX) : gbmv_k.c $(CC) -c -UCOMPLEX -UDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< sgbmv_t.$(SUFFIX) sgbmv_t.$(PSUFFIX) : gbmv_k.c $(CC) -c -UCOMPLEX -UDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< +endif + +ifeq ($(BUILD_DOUBLE),1) dgbmv_n.$(SUFFIX) dgbmv_n.$(PSUFFIX) : gbmv_k.c $(CC) -c -UCOMPLEX -DDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< dgbmv_t.$(SUFFIX) dgbmv_t.$(PSUFFIX) : gbmv_k.c $(CC) -c -UCOMPLEX -DDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< +endif qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c $(CC) -c -UCOMPLEX -DXDOUBLE -UTRANS $(CFLAGS) -o $(@F) $< @@ -437,6 +481,8 @@ qgbmv_n.$(SUFFIX) qgbmv_n.$(PSUFFIX) : gbmv_k.c qgbmv_t.$(SUFFIX) qgbmv_t.$(PSUFFIX) : gbmv_k.c $(CC) -c -UCOMPLEX -DXDOUBLE -DTRANS $(CFLAGS) -o $(@F) $< +ifeq ($(BUILD_COMPLEX),1) + cgbmv_n.$(SUFFIX) cgbmv_n.$(PSUFFIX) : zgbmv_k.c $(CC) -c -DCOMPLEX -UDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< @@ -460,6 +506,9 @@ cgbmv_s.$(SUFFIX) cgbmv_s.$(PSUFFIX) : zgbmv_k.c cgbmv_d.$(SUFFIX) cgbmv_d.$(PSUFFIX) : zgbmv_k.c $(CC) -c -DCOMPLEX -UDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< +endif + +ifeq ($(BUILD_COMPLEX16),1) zgbmv_n.$(SUFFIX) zgbmv_n.$(PSUFFIX) : zgbmv_k.c $(CC) -c -DCOMPLEX -DDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< @@ -484,6 +533,7 @@ zgbmv_s.$(SUFFIX) zgbmv_s.$(PSUFFIX) : zgbmv_k.c zgbmv_d.$(SUFFIX) zgbmv_d.$(PSUFFIX) : zgbmv_k.c $(CC) -c -DCOMPLEX -DDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< +endif xgbmv_n.$(SUFFIX) xgbmv_n.$(PSUFFIX) : zgbmv_k.c $(CC) -c -DCOMPLEX -DXDOUBLE -UTRANS -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< @@ -509,24 +559,34 @@ xgbmv_s.$(SUFFIX) xgbmv_s.$(PSUFFIX) : zgbmv_k.c xgbmv_d.$(SUFFIX) xgbmv_d.$(PSUFFIX) : zgbmv_k.c $(CC) -c -DCOMPLEX -DXDOUBLE -DTRANS -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< + +ifeq ($(BUILD_SINGLE),1) + sgbmv_thread_n.$(SUFFIX) sgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c $(CC) -c -UCOMPLEX -UDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< sgbmv_thread_t.$(SUFFIX) sgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c $(CC) -c -UCOMPLEX -UDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< +endif + + +ifeq ($(BUILD_DOUBLE),1) dgbmv_thread_n.$(SUFFIX) dgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c $(CC) -c -UCOMPLEX -DDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< dgbmv_thread_t.$(SUFFIX) dgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c $(CC) -c -UCOMPLEX -DDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< - +endif qgbmv_thread_n.$(SUFFIX) qgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c $(CC) -c -UCOMPLEX -DXDOUBLE -UTRANSA $(CFLAGS) -o $(@F) $< qgbmv_thread_t.$(SUFFIX) qgbmv_thread_t.$(PSUFFIX) : gbmv_thread.c $(CC) -c -UCOMPLEX -DXDOUBLE -DTRANSA $(CFLAGS) -o $(@F) $< + +ifeq ($(BUILD_COMPLEX),1) + cgbmv_thread_n.$(SUFFIX) cgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c $(CC) -c -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< @@ -550,6 +610,10 @@ cgbmv_thread_s.$(SUFFIX) cgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c cgbmv_thread_d.$(SUFFIX) cgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c $(CC) -c -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< +endif + + +ifeq ($(BUILD_COMPLEX16),1) zgbmv_thread_n.$(SUFFIX) zgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c $(CC) -c -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< @@ -574,6 +638,7 @@ zgbmv_thread_s.$(SUFFIX) zgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c zgbmv_thread_d.$(SUFFIX) zgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c $(CC) -c -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< +endif xgbmv_thread_n.$(SUFFIX) xgbmv_thread_n.$(PSUFFIX) : gbmv_thread.c $(CC) -c -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $(CFLAGS) -o $(@F) $< @@ -599,24 +664,32 @@ xgbmv_thread_s.$(SUFFIX) xgbmv_thread_s.$(PSUFFIX) : gbmv_thread.c xgbmv_thread_d.$(SUFFIX) xgbmv_thread_d.$(PSUFFIX) : gbmv_thread.c $(CC) -c -DCOMPLEX -DXDOUBLE -DTRANSA -DCONJ -DXCONJ $(CFLAGS) -o $(@F) $< + +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" sgemv_thread_n.$(SUFFIX) sgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) sgemv_thread_t.$(SUFFIX) sgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) +endif + +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" dgemv_thread_n.$(SUFFIX) dgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) dgemv_thread_t.$(SUFFIX) dgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) - +endif qgemv_thread_n.$(SUFFIX) qgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) qgemv_thread_t.$(SUFFIX) qgemv_thread_t.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) + +ifeq ($(BUILD_COMPLEX),1) + cgemv_thread_n.$(SUFFIX) cgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) @@ -640,6 +713,10 @@ cgemv_thread_s.$(SUFFIX) cgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common. cgemv_thread_d.$(SUFFIX) cgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) +endif + + +ifeq ($(BUILD_COMPLEX16),1) zgemv_thread_n.$(SUFFIX) zgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) @@ -664,6 +741,7 @@ zgemv_thread_s.$(SUFFIX) zgemv_thread_s.$(PSUFFIX) : gemv_thread.c ../../common. zgemv_thread_d.$(SUFFIX) zgemv_thread_d.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DTRANSA -DCONJ -DXCONJ $< -o $(@F) +endif xgemv_thread_n.$(SUFFIX) xgemv_thread_n.$(PSUFFIX) : gemv_thread.c ../../common.h $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) diff --git a/driver/level3/CMakeLists.txt b/driver/level3/CMakeLists.txt index f788c45b9..077862abc 100644 --- a/driver/level3/CMakeLists.txt +++ b/driver/level3/CMakeLists.txt @@ -14,6 +14,24 @@ foreach (GEMM_DEFINE ${GEMM_DEFINES}) endif () endforeach () +if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) +foreach (GEMM_DEFINE ${GEMM_DEFINES}) + string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE") + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "DOUBLE") + endif() +endforeach() +endif() +if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) +foreach (GEMM_DEFINE ${GEMM_DEFINES}) + string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE}" "gemm_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE") + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm_thread_${GEMM_DEFINE_LC}" 0 "" "" false "SINGLE") + endif() +endforeach() +endif() set(TRMM_TRSM_SOURCES trmm_L.c @@ -100,7 +118,24 @@ foreach (float_type ${FLOAT_TYPES}) endif() endif () endforeach () - + + if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) + string(TOLOWER ${gemm_define} gemm_define_LC) + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "DOUBLE" ) + endif() + endforeach() + endif () + if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) + foreach (gemm_define ${GEMM_COMPLEX_DEFINES}) + string(TOLOWER ${gemm_define} gemm_define_LC) + if (USE_THREAD AND NOT USE_SIMPLE_THREADED_LEVEL3) + GenerateNamedObjects("gemm.c" "${gemm_define};THREADED_LEVEL3" "gemm_thread_${gemm_define_LC}" false "" "" false "SINGLE" ) + endif() + endforeach() + endif () + # for gemm3m if(USE_GEMM3M) foreach (GEMM_DEFINE ${GEMM_DEFINES}) diff --git a/driver/level3/Makefile b/driver/level3/Makefile index 09a62d9bf..e3aa30256 100644 --- a/driver/level3/Makefile +++ b/driver/level3/Makefile @@ -287,6 +287,60 @@ HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \ dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) endif +ifneq ($(BUILD_SINGLE),1) + SBLASOBJS= +ifeq ($(BUILD_DOUBLE),1) + SBLASOBJS= \ + strsm_LNUU.$(SUFFIX) strsm_LNUN.$(SUFFIX) strsm_LNLU.$(SUFFIX) strsm_LNLN.$(SUFFIX) \ + strsm_LTUU.$(SUFFIX) strsm_LTUN.$(SUFFIX) strsm_LTLU.$(SUFFIX) strsm_LTLN.$(SUFFIX) \ + strsm_RNUU.$(SUFFIX) strsm_RNUN.$(SUFFIX) strsm_RNLU.$(SUFFIX) strsm_RNLN.$(SUFFIX) \ + strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \ + ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \ + ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX) +ifndef USE_SIMPLE_THREADED_LEVEL3 +SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX) +endif +endif +ifeq ($(BUILD_COMPLEX),1) + SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) +ifndef USE_SIMPLE_THREADED_LEVEL3 +SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) +endif +endif +endif +ifneq ($(BUILD_DOUBLE),1) + DBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) + DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX) +ifndef USE_SIMPLE_THREADED_LEVEL3 +DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) +endif +endif +endif +ifneq ($(BUILD_COMPLEX),1) + CBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) + CBLASOBJS= \ + cherk_UN.$(SUFFIX) cherk_UC.$(SUFFIX) cherk_LN.$(SUFFIX) cherk_LC.$(SUFFIX) \ + cherk_kernel_UN.$(SUFFIX) cherk_kernel_UC.$(SUFFIX) \ + cherk_kernel_LN.$(SUFFIX) cherk_kernel_LC.$(SUFFIX) \ + ctrsm_LNUU.$(SUFFIX) ctrsm_LNUN.$(SUFFIX) ctrsm_LNLU.$(SUFFIX) ctrsm_LNLN.$(SUFFIX) \ + ctrsm_LTUU.$(SUFFIX) ctrsm_LTUN.$(SUFFIX) ctrsm_LTLU.$(SUFFIX) ctrsm_LTLN.$(SUFFIX) \ + ctrsm_LRUU.$(SUFFIX) ctrsm_LRUN.$(SUFFIX) ctrsm_LRLU.$(SUFFIX) ctrsm_LRLN.$(SUFFIX) \ + ctrsm_LCUU.$(SUFFIX) ctrsm_LCUN.$(SUFFIX) ctrsm_LCLU.$(SUFFIX) ctrsm_LCLN.$(SUFFIX) \ + ctrsm_RNUU.$(SUFFIX) ctrsm_RNUN.$(SUFFIX) ctrsm_RNLU.$(SUFFIX) ctrsm_RNLN.$(SUFFIX) \ + ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \ + ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \ + ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX) +ifndef USE_SIMPLE_THREADED_LEVEL3 +CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX) +endif +endif +endif +ifneq ($(BUILD_COMPLEX16),1) + ZBLASOBJS= +endif + all :: shgemm_nn.$(SUFFIX) : gemm.c level3.c ../../param.h diff --git a/driver/level3/syrk_thread.c b/driver/level3/syrk_thread.c index b26d363c4..12808afd5 100644 --- a/driver/level3/syrk_thread.c +++ b/driver/level3/syrk_thread.c @@ -56,12 +56,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( if (!(mode & BLAS_COMPLEX)) { switch (mode & BLAS_PREC) { +#if defined(BUILD_SINGLE) || defined(BUILD_COMPLEX) case BLAS_SINGLE: mask = SGEMM_UNROLL_MN - 1; break; +#endif +#if defined(BUILD_DOUBLE) || defined(BUILD_COMPLEX16) case BLAS_DOUBLE: mask = DGEMM_UNROLL_MN - 1; break; +#endif #ifdef EXPRECISION case BLAS_XDOUBLE: mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; @@ -70,12 +74,16 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( } } else { switch (mode & BLAS_PREC) { +#ifdef BUILD_COMPLEX case BLAS_SINGLE: mask = CGEMM_UNROLL_MN - 1; break; +#endif +#ifdef BUILD_COMPLEX16 case BLAS_DOUBLE: mask = ZGEMM_UNROLL_MN - 1; break; +#endif #ifdef EXPRECISION case BLAS_XDOUBLE: mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; diff --git a/driver/others/blas_server.c b/driver/others/blas_server.c index 8d3dda3bf..acfaed75d 100644 --- a/driver/others/blas_server.c +++ b/driver/others/blas_server.c @@ -459,13 +459,16 @@ blas_queue_t *tscq; } else #endif if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) { +#ifdef BUILD_DOUBLE sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); - +#endif } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { +#ifdef BUILD_SINGLE sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); - } else { +#endif + } else { /* Other types in future */ } } else { @@ -476,11 +479,15 @@ blas_queue_t *tscq; } else #endif if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ +#ifdef BUILD_COMPLEX16 sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { +#ifdef BUILD_COMPLEX sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif } else { /* Other types in future */ } diff --git a/driver/others/blas_server_omp.c b/driver/others/blas_server_omp.c index da0a5674a..bfbe3a647 100644 --- a/driver/others/blas_server_omp.c +++ b/driver/others/blas_server_omp.c @@ -315,12 +315,15 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ } else #endif if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ +#if defined ( BUILD_DOUBLE) || defined (BUILD_COMPLEX16) sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); - +#endif } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE){ +#if defined (BUILD_SINGLE) || defined (BUILD_COMPLEX) sb = (void *)(((BLASLONG)sa + ((SGEMM_P * SGEMM_Q * sizeof(float) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#endif } else { /* Other types in future */ } @@ -332,15 +335,24 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ } else #endif if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){ +#ifdef BUILD_COMPLEX16 sb = (void *)(((BLASLONG)sa + ((ZGEMM_P * ZGEMM_Q * 2 * sizeof(double) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#else +fprintf(stderr,"UNHANDLED COMPLEX16\n"); +#endif } else if ((queue -> mode & BLAS_PREC) == BLAS_SINGLE) { +#ifdef BUILD_COMPLEX sb = (void *)(((BLASLONG)sa + ((CGEMM_P * CGEMM_Q * 2 * sizeof(float) + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B); +#else +fprintf(stderr,"UNHANDLED COMPLEX\n"); +#endif } else { /* Other types in future */ } } +if (!sb) fprintf(stderr,"SB not declared!!!\n"); queue->sb=sb; } } diff --git a/driver/others/memory.c b/driver/others/memory.c index 91cfefbd7..ba2bb55b9 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -2201,11 +2201,17 @@ static void *alloc_mmap(void *address){ #endif #endif - - allocsize = DGEMM_P * DGEMM_Q * sizeof(double); - - start = (BLASULONG)map_address; - current = (SCALING - 1) * BUFFER_SIZE; +#ifdef BUILD_DOUBLE + allocsize = DGEMM_P * DGEMM_Q * sizeof(double); +#elif defined(BUILD_COMPLEX16) + allocsize = ZGEMM_P * ZGEMM_Q * sizeof(double); +#elif defined(BUILD_COMPLEX) + allocsize = CGEMM_P * CGEMM_Q * sizeof(double); +#else + allocsize = SGEMM_P * SGEMM_Q * sizeof(double); +#endif + start = (BLASULONG)map_address; + current = (SCALING - 1) * BUFFER_SIZE; while(current > 0) { *(BLASLONG *)start = (BLASLONG)start + PAGESIZE; diff --git a/exports/Makefile b/exports/Makefile index 75901586c..960150c86 100644 --- a/exports/Makefile +++ b/exports/Makefile @@ -33,6 +33,18 @@ endif ifndef BUILD_HALF BUILD_HALF = 0 endif +ifndef BUILD_SINGLE +BUILD_SINGLE = 0 +endif +ifndef BUILD_DOUBLE +BUILD_DOUBLE = 0 +endif +ifndef BUILD_COMPLEX +BUILD_COMPLEX = 0 +endif +ifndef BUILD_COMPLEX16 +BUILD_COMPLEX16 = 0 +endif ifeq ($(OSNAME), WINNT) ifeq ($(F_COMPILER), GFORTRAN) @@ -108,10 +120,10 @@ dll : ../$(LIBDLLNAME) -Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB) $(LIBPREFIX).def : gensymbol - perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) + perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) libgoto_hpl.def : gensymbol - perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F) + perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) ifeq ($(OSNAME), Darwin) INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib @@ -246,23 +258,23 @@ static : ../$(LIBNAME) rm -f goto.$(SUFFIX) osx.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) + perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) aix.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) + perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) objcopy.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) + perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) objconv.def : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F) + perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F) test : linktest.c $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. rm -f linktest linktest.c : gensymbol ../Makefile.system ../getarch.c - perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c + perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > linktest.c clean :: @rm -f *.def *.dylib __.SYMDEF* *.renamed diff --git a/exports/gensymbol b/exports/gensymbol index ce4d9bb64..736fdc2cd 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -16,74 +16,84 @@ # 2017/08/01 Saar # removed blas_thread_shutdown_ # -@blasobjs = ( - caxpy,ccopy,cdotc,cdotu,cgbmv,cgemm,cgemv,cgerc,cgeru, - chbmv,chemm,chemv,cher2,cher2k,cher,cherk, - chpmv,chpr2,chpr,crotg,cscal,csrot,csscal,cswap, +@blasobjsc = ( + caxpy,caxpby,ccopy,cdotc,cdotu,cgbmv,cgemm,cgemv,cgerc,cgeru, + chbmv,chemm,chemv,cher2,cher2k,cher,cherk,scabs1,scamax, + chpmv,chpr2,chpr,crotg,cscal,csrot,csscal,cswap,scamin,scasum,scnrm2, csymm,csyr2k,csyrk,ctbmv,ctbsv,ctpmv,ctpsv,ctrmm,ctrmv,ctrsm, - ctrsv, - damax,damin,dasum,daxpy,dcabs1,dcopy,ddot,dgbmv,dgemm, + ctrsv,icamax,icamin,cimatcopy,comatcopy,cgeadd,scsum); + +@blasobjsd = ( + damax,damin,dasum,daxpy,daxpby,dcabs1,dcopy,ddot,dgbmv,dgemm, dgemv,dger,dmax,dmin,dnrm2,drot,drotg,drotm,drotmg,dsbmv, - dscal,dsdot,dspmv,dspr2, + dscal,dsdot,dspmv,dspr2,dimatcopy,domatcopy, dspr,dswap,dsymm,dsymv,dsyr2,dsyr2k,dsyr,dsyrk,dtbmv,dtbsv, - dtpmv,dtpsv,dtrmm,dtrmv,dtrsm,dtrsv,dzamax,dzamin,dzasum,dznrm2, - icamax,icamin,idamax,idamin,idmax,idmin,isamax,isamin,ismax,ismin, - izamax,izamin,lsame,samax,samin,sasum,saxpy,scabs1,scamax, - scamin,scasum,scnrm2,scopy,sdot,sdsdot,sgbmv,sgemm,sgemv,sger, - smax,smin,snrm2, + dtpmv,dtpsv,dtrmm,dtrmv,dtrsm,dtrsv, + idamax,idamin,idmax,idmin,dgeadd,dsum); + +@blasobjss = ( + isamax,isamin,ismax,ismin, + samax,samin,sasum,saxpy, saxpby, + scopy,sdot,sdsdot,sgbmv,sgemm,sgemv,sger, + smax,smin,snrm2,simatcopy,somatcopy, srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap, ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv, - strmm,strmv,strsm,strsv,zaxpy,zcopy,zdotc,zdotu,zdrot, + strmm,strmv,strsm,strsv, sgeadd,ssum); + +@blasobjsz = ( + izamax,izamin,, + zaxpy,zaxpby,zcopy,zdotc,zdotu,zdrot, zdscal,zgbmv,zgemm,zgemv,zgerc,zgeru, zhbmv,zhemm,zhemv,zher2,zher2k,zher,zherk,zhpmv,zhpr2, zhpr,zrotg,zscal,zswap,zsymm,zsyr2k,zsyrk,ztbmv, ztbsv,ztpmv,ztpsv,ztrmm,ztrmv,ztrsm,ztrsv, - xerbla, - saxpby,daxpby,caxpby,zaxpby, - somatcopy, domatcopy, comatcopy, zomatcopy, - simatcopy, dimatcopy, cimatcopy, zimatcopy, - sgeadd,dgeadd,cgeadd,zgeadd, - ssum, dsum, scsum, dzsum -); + zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2, + zgeadd, dzsum); +@cblasobjs = (lsame, xerbla); @halfblasobjs = (shgemm, shdot, shstobf16, shdtobf16, sbf16tos, dbf16tod); -@cblasobjs = ( +@cblasobjsc = ( cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv, cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k, - cblas_cher, cblas_cherk, cblas_chpmv, cblas_chpr2, cblas_chpr, cblas_cscal, - cblas_csscal, cblas_cswap, cblas_csymm, cblas_csyr2k, cblas_csyrk, cblas_ctbmv, - cblas_ctbsv, cblas_ctpmv, cblas_ctpsv, cblas_ctrmm, cblas_ctrmv, cblas_ctrsm, cblas_ctrsv, + cblas_cher, cblas_cherk, cblas_chpmv, cblas_chpr2, cblas_chpr, cblas_cscal, cblas_caxpby, + cblas_csscal, cblas_cswap, cblas_csymm, cblas_csyr2k, cblas_csyrk, cblas_ctbmv, cblas_cgeadd, + cblas_ctbsv, cblas_ctpmv, cblas_ctpsv, cblas_ctrmm, cblas_ctrmv, cblas_ctrsm, cblas_ctrsv, + cblas_scnrm2, cblas_scasum, + cblas_icamax, cblas_icamin, cblas_icmin, cblas_icmax, cblas_scsum,cblas_cimatcopy,cblas_comatcopy + ); +@cblasobjsd = ( cblas_dasum, cblas_daxpy, cblas_dcopy, cblas_ddot, cblas_dgbmv, cblas_dgemm, cblas_dgemv, cblas_dger, cblas_dnrm2, cblas_drot, cblas_drotg, cblas_drotm, cblas_drotmg, cblas_dsbmv, cblas_dscal, cblas_dsdot, cblas_dspmv, cblas_dspr2, cblas_dspr, cblas_dswap, cblas_dsymm, cblas_dsymv, cblas_dsyr2, cblas_dsyr2k, cblas_dsyr, cblas_dsyrk, cblas_dtbmv, cblas_dtbsv, cblas_dtpmv, cblas_dtpsv, - cblas_dtrmm, cblas_dtrmv, cblas_dtrsm, cblas_dtrsv, cblas_dzasum, - cblas_dznrm2, cblas_icamax, cblas_idamax, - cblas_isamax, cblas_izamax, + cblas_dtrmm, cblas_dtrmv, cblas_dtrsm, cblas_dtrsv, cblas_daxpby, cblas_dgeadd, + cblas_idamax, cblas_idamin, cblas_idmin, cblas_idmax, cblas_dsum,cblas_dimatcopy,cblas_domatcopy + ); + +@cblasobjss = ( cblas_sasum, cblas_saxpy, - cblas_scasum, cblas_scnrm2, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm, + cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm, cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg, cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr, cblas_sswap, cblas_ssymm, cblas_ssymv, cblas_ssyr2, cblas_ssyr2k, cblas_ssyr, cblas_ssyrk, cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm, - cblas_strsv, cblas_zaxpy, cblas_zcopy, cblas_zdotc, cblas_zdotu, cblas_zdscal, + cblas_strsv, cblas_sgeadd, + cblas_isamax, cblas_isamin, cblas_ismin, cblas_ismax, cblas_ssum,cblas_simatcopy,cblas_somatcopy + ); +@cblasobjsz = ( + cblas_dzasum, cblas_dznrm2, cblas_zaxpy, cblas_zcopy, cblas_zdotc, cblas_zdotu, cblas_zdscal, cblas_zgbmv, cblas_zgemm, cblas_zgemv, cblas_zgerc, cblas_zgeru, cblas_zhbmv, cblas_zhemm, cblas_zhemv, cblas_zher2, cblas_zher2k, cblas_zher, cblas_zherk, cblas_zhpmv, cblas_zhpr2, cblas_zhpr, cblas_zscal, cblas_zswap, cblas_zsymm, cblas_zsyr2k, cblas_zsyrk, cblas_ztbmv, cblas_ztbsv, cblas_ztpmv, cblas_ztpsv, cblas_ztrmm, cblas_ztrmv, cblas_ztrsm, cblas_ztrsv, cblas_cdotc_sub, cblas_cdotu_sub, cblas_zdotc_sub, cblas_zdotu_sub, - cblas_saxpby,cblas_daxpby,cblas_caxpby,cblas_zaxpby, - cblas_somatcopy, cblas_domatcopy, cblas_comatcopy, cblas_zomatcopy, - cblas_simatcopy, cblas_dimatcopy, cblas_cimatcopy, cblas_zimatcopy, - cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd, - cblas_isamin, cblas_idamin, cblas_icamin, cblas_izamin, - cblas_ismin, cblas_idmin, cblas_icmin, cblas_izmin, - cblas_ismax, cblas_idmax, cblas_icmax, cblas_izmax, - cblas_ssum, cblas_dsum, cblas_scsum, cblas_dzsum, - cblas_xerbla + cblas_zaxpby, cblas_zgeadd, + cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy ); +@cblasobjs = ( cblas_xerbla ); + @halfcblasobjs = (cblas_shgemm, cblas_shdot, cblas_shstobf16, cblas_shdtobf16, cblas_sbf16tos, cblas_dbf16tod); @exblasobjs = ( @@ -103,12 +113,22 @@ # xdrot,xrotg, ); -@gemm3mobjs = ( - cgemm3m,zgemm3m + @gemm3mobjs=(); + + @cblasgemm3mobjs=(); + +@gemm3mobjsc = ( + cgemm3m, +); +@gemm3mobjsz = ( + zgemm3m ); -@cblasgemm3mobjs = ( - cblas_cgemm3m,cblas_zgemm3m +@cblasgemm3mobjsc = ( + cblas_cgemm3m +); +@cblasgemm3mobjsz = ( + cblas_zgemm3m ); @@ -131,22 +151,68 @@ @misc_underscore_objs = ( ); -@lapackobjs = ( +@lapackobjss = ( # These routines are provided by OpenBLAS. - sgesv, dgesv, cgesv, zgesv, - sgetf2, dgetf2, cgetf2, zgetf2, - sgetrf, dgetrf, cgetrf, zgetrf, - slaswp, dlaswp, claswp, zlaswp, - sgetrs, dgetrs, cgetrs, zgetrs, - slauu2, dlauu2, clauu2, zlauu2, - slauum, dlauum, clauum, zlauum, - spotf2, dpotf2, cpotf2, zpotf2, - spotrf, dpotrf, cpotrf, zpotrf, - strti2, dtrti2, ctrti2, ztrti2, - strtri, dtrtri, ctrtri, ztrtri, - spotri, dpotri, cpotri, zpotri, + sgesv, + sgetf2, + sgetrf, + slaswp, + sgetrs, + slauu2, + slauum, + spotf2, + spotrf, + strti2, + strtri, + spotri, ); +@lapackobjsd = ( + dgesv, + dgetf2, + dgetrf, + dlaswp, + dgetrs, + dlauu2, + dlauum, + dpotf2, + dpotrf, + dtrti2, + dtrtri, + dpotri, +); + +@lapackobjsc = ( +cgesv, +cgetf2, +cgetrf, +claswp, +cgetrs, +clauu2, +clauum, +cpotf2, +cpotrf, +ctrti2, +ctrtri, +cpotri, +); + +@lapackobjsz = ( +zgesv, +zgetf2, +zgetrf, +zlaswp, +zgetrs, +zlauu2, +zlauum, +zpotf2, +zpotrf, +ztrti2, +ztrtri, +zpotri, +); + + @lapackobjs2 = ( # These routines are provided by LAPACK (reference implementation). # @@ -162,7 +228,9 @@ ilaenv, ieeeck, lsamen, iparmq, ilaprec, ilatrans, ilauplo, iladiag, ilaver, slamch, slamc3, - +); + +@lapackobjs2sc = ( # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. # excluded: second_$(TIMER) sbdsdc, @@ -180,7 +248,9 @@ slasr, slasrt, slassq, slasv2, spttrf, sstebz, sstedc, ssteqr, ssterf, slaisnan, sisnan, slartgp, slartgs, +); +@lapackobjs2dz = ( # DZLAUX -- Auxiliary routines called from both DOUBLE and COMPLEX*16. # excluded: dsecnd_$(TIMER) dbdsdc, @@ -199,7 +269,9 @@ dsteqr, dsterf, dlaisnan, disnan, dlartgp, dlartgs, dlamch, dlamc3, +); +@lapackobjs2s = ( # SLASRC -- Single precision real LAPACK routines # already provided by @lapackobjs: # sgesv, sgetf2, slaswp, slauu2, slauum, spotf2, spotri, strti2, strtri @@ -262,7 +334,9 @@ sorbdb5, sorbdb6, sorcsd, sorcsd2by1, sgeqrt, sgeqrt2, sgeqrt3, sgemqrt, stpqrt, stpqrt2, stpmqrt, stprfb, +); +@lapackobjs2ds = ( # DSLASRC -- Double-single mixed precision real routines called from # single, single-extra and double precision real LAPACK # routines (i.e. from SLASRC, SXLASRC, DLASRC). @@ -270,7 +344,9 @@ # already provided by @lapackobjs: # sgetrs, spotrf, sgetrf spotrs, +); +@lapackobjs2c = ( # CLASRC -- Single precision complex LAPACK routines # already provided by @blasobjs: # already provided by @lapackobjs: @@ -338,7 +414,8 @@ cunbdb5, cunbdb6, cuncsd, cuncsd2by1, cgeqrt, cgeqrt2, cgeqrt3, cgemqrt, ctpqrt, ctpqrt2, ctpmqrt, ctprfb, - +); +@lapack2objszc = ( # ZCLASRC -- Double-single mixed precision complex routines called from # single, single-extra and double precision complex LAPACK # routines (i.e. from CLASRC, CXLASRC, ZLASRC). @@ -346,7 +423,9 @@ # already provided by @lapackobjs: # cgetrs, cpotrf, cgetrf cpotrs, +); +@lapack2objsd = ( # DLASRC -- Double precision real LAPACK routines # already provided by @lapackobjs: # dgesv, dgetf2, dgetrs, dlaswp, dlauu2, dlauum, dpotf2, dpotrf, dpotri, @@ -411,7 +490,8 @@ dorbdb5, dorbdb6, dorcsd, dorcsd2by1, dgeqrt, dgeqrt2, dgeqrt3, dgemqrt, dtpqrt, dtpqrt2, dtpmqrt, dtprfb, - +); +@lapackobjs2z = ( # ZLASRC -- Double precision complex LAPACK routines # already provided by @blasobjs: # already provided by @lapackobjs: @@ -485,8 +565,10 @@ zunbdb5, zunbdb6, zuncsd, zuncsd2by1, zgeqrt, zgeqrt2, zgeqrt3, zgemqrt, ztpqrt, ztpqrt2, ztpmqrt, ztprfb, +); # functions added for lapack-3.6.0 +@lapack2objsc = ( @lapack2objsc, cgejsv, cgesvdx, cgesvj, @@ -521,6 +603,8 @@ cspr2, csyr2, cunm22, +); +@lapackobjs2d = (@lapack2objsd, dbdsvdx, dgesvdx, dgetrf2, @@ -552,6 +636,8 @@ dorm22, dpotrf2, dsecnd, + ); + @lapack2objss = (@lapack2objss, sbdsvdx, second, sgesvdx, @@ -583,6 +669,8 @@ slatmt, sorm22, spotrf2, + ); + @lapack2objsz = (@lapack2objsz, zgejsv, zgesvdx, zgesvj, @@ -617,9 +705,9 @@ zspr2, zsyr2, zunm22, - +); # functions added for lapack-3.7.0 - +@lapack2objss = (@lapack2objss, slarfy, strevc3, sgelqt, @@ -637,6 +725,8 @@ stplqt, stplqt2, stpmlqt, + ); + @lapack2objsd = (@lapack2objsd, dlarfy, dsyconvf, dtrevc3, @@ -655,6 +745,8 @@ dtplqt, dtplqt2, dtpmlqt, + ); + @lapack2objsc = (@lapack2objsc, clarfy, csyconvf, ctrevc3, @@ -673,6 +765,8 @@ ctplqt, ctplqt2, ctpmlqt, + ); + @lapack2objsz = (@lapack2objsz, zlarfy, zsyconvf, ztrevc3, @@ -691,6 +785,8 @@ zlaswlq, zlamswlq, zgemlq, + ); + @lapack2objs = (@lapack2objs, sladiv1, dladiv1, iparam2stage, @@ -698,16 +794,23 @@ # functions added for lapack-3.8.0 ilaenv2stage, - + ); # functions added for lapack-3.9.0 +@lapack2objsc = (@lapack2objsc, cgesvdq, cungtsqr, dcombssq, + ); +@lapack2objsd = (@lapack2objsd, dgesvdq, dorgtsqr, + ); +@lapack2objss = (@lapack2objss, scombssq, sgesvdq, sorgtsqr, + ); +@lapack2objsz = (@lapack2objsz, zgesvdq, zungtsqr ); @@ -717,36 +820,54 @@ dlagsy, dsysvxx, sporfsx, slatms, zlatms, zherfsx, csysvxx, ); -@lapack_deprecated_objs = ( - cgegs, cggsvd, ctzrqf, dgeqpf, dlatzm, sgelsx, slahrd, zgegv, zggsvp, - cgegv, cggsvp, dgegs, dggsvd, dtzrqf, sgeqpf, slatzm, zgelsx, zlahrd, - cgelsx, clahrd, dgegv, dggsvp, sgegs, sggsvd, stzrqf, zgeqpf, zlatzm, - cgeqpf, clatzm, dgelsx, dlahrd, sgegv, sggsvp, zgegs, zggsvd, ztzrqf, -); - -@lapacke_deprecated_objs = ( +@lapack_deprecated_objsc = ( + cgegs, cggsvd, + cgegv, cggsvp, + cgelsx, clahrd, + cgeqpf, clatzm, + ctzrqf, + ); +@lapack_deprecated_objsd = ( + dgegs, dgeqpf, + dgegv, dggsvd, + dgelsx, dggsvp, + dlahrd, + dlatzm, dtzrqf); + +@lapack_deprecated_objss = ( + sgegs, + sgegv, + ); + +@lapacke_deprecated_objsc = ( LAPACKE_cggsvp, LAPACKE_cggsvp_work, - LAPACKE_dggsvp, - LAPACKE_dggsvp_work, - LAPACKE_sggsvp, - LAPACKE_sggsvp_work, - LAPACKE_zggsvp, - LAPACKE_zggsvp_work, LAPACKE_cggsvd, LAPACKE_cggsvd_work, - LAPACKE_dggsvd, - LAPACKE_dggsvd_work, - LAPACKE_sggsvd, - LAPACKE_sggsvd_work, - LAPACKE_zggsvd, - LAPACKE_zggsvd_work, LAPACKE_cgeqpf, LAPACKE_cgeqpf_work, +); +@lapacke_deprecated_objsd = ( + LAPACKE_dggsvp, + LAPACKE_dggsvp_work, + LAPACKE_dggsvd, + LAPACKE_dggsvd_work, LAPACKE_dgeqpf, LAPACKE_dgeqpf_work, +); +@lapacke_deprecated_objss = ( + LAPACKE_sggsvp, + LAPACKE_sggsvp_work, + LAPACKE_sggsvd, + LAPACKE_sggsvd_work, LAPACKE_sgeqpf, LAPACKE_sgeqpf_work, +); +@lapacke_deprecated_objsz = ( + LAPACKE_zggsvp, + LAPACKE_zggsvp_work, + LAPACKE_zggsvd, + LAPACKE_zggsvd_work, LAPACKE_zgeqpf, LAPACKE_zgeqpf_work, ); @@ -763,6 +884,15 @@ # exported since the respective LAPACK routines are not built by default. # @(OBJ) from `lapack-3.4.1/lapacke/utils/Makefile` + LAPACKE_lsame, + LAPACKE_ilaver, + LAPACKE_xerbla, + lapack_make_complex_float, + lapack_make_complex_double, + LAPACKE_get_nancheck, + LAPACKE_set_nancheck, +); +@lapackeobjsc = ( LAPACKE_cgb_nancheck, LAPACKE_cgb_trans, LAPACKE_cge_nancheck, @@ -801,118 +931,6 @@ LAPACKE_ctp_trans, LAPACKE_ctr_nancheck, LAPACKE_ctr_trans, - LAPACKE_dgb_nancheck, - LAPACKE_dgb_trans, - LAPACKE_dge_nancheck, - LAPACKE_dge_trans, - LAPACKE_dgg_nancheck, - LAPACKE_dgg_trans, - LAPACKE_dgt_nancheck, - LAPACKE_dhs_nancheck, - LAPACKE_dhs_trans, - LAPACKE_d_nancheck, - LAPACKE_dpb_nancheck, - LAPACKE_dpb_trans, - LAPACKE_dpf_nancheck, - LAPACKE_dpf_trans, - LAPACKE_dpo_nancheck, - LAPACKE_dpo_trans, - LAPACKE_dpp_nancheck, - LAPACKE_dpp_trans, - LAPACKE_dpt_nancheck, - LAPACKE_dsb_nancheck, - LAPACKE_dsb_trans, - LAPACKE_dsp_nancheck, - LAPACKE_dsp_trans, - LAPACKE_dst_nancheck, - LAPACKE_dsy_nancheck, - LAPACKE_dsy_trans, - LAPACKE_dtb_nancheck, - LAPACKE_dtb_trans, - LAPACKE_dtf_nancheck, - LAPACKE_dtf_trans, - LAPACKE_dtp_nancheck, - LAPACKE_dtp_trans, - LAPACKE_dtr_nancheck, - LAPACKE_dtr_trans, - LAPACKE_lsame, - LAPACKE_sgb_nancheck, - LAPACKE_sgb_trans, - LAPACKE_sge_nancheck, - LAPACKE_sge_trans, - LAPACKE_sgg_nancheck, - LAPACKE_sgg_trans, - LAPACKE_sgt_nancheck, - LAPACKE_shs_nancheck, - LAPACKE_shs_trans, - LAPACKE_s_nancheck, - LAPACKE_spb_nancheck, - LAPACKE_spb_trans, - LAPACKE_spf_nancheck, - LAPACKE_spf_trans, - LAPACKE_spo_nancheck, - LAPACKE_spo_trans, - LAPACKE_spp_nancheck, - LAPACKE_spp_trans, - LAPACKE_spt_nancheck, - LAPACKE_ssb_nancheck, - LAPACKE_ssb_trans, - LAPACKE_ssp_nancheck, - LAPACKE_ssp_trans, - LAPACKE_sst_nancheck, - LAPACKE_ssy_nancheck, - LAPACKE_ssy_trans, - LAPACKE_stb_nancheck, - LAPACKE_stb_trans, - LAPACKE_stf_nancheck, - LAPACKE_stf_trans, - LAPACKE_stp_nancheck, - LAPACKE_stp_trans, - LAPACKE_str_nancheck, - LAPACKE_str_trans, - LAPACKE_xerbla, - LAPACKE_zgb_nancheck, - LAPACKE_zgb_trans, - LAPACKE_zge_nancheck, - LAPACKE_zge_trans, - LAPACKE_zgg_nancheck, - LAPACKE_zgg_trans, - LAPACKE_zgt_nancheck, - LAPACKE_zhb_nancheck, - LAPACKE_zhb_trans, - LAPACKE_zhe_nancheck, - LAPACKE_zhe_trans, - LAPACKE_zhp_nancheck, - LAPACKE_zhp_trans, - LAPACKE_zhs_nancheck, - LAPACKE_zhs_trans, - LAPACKE_z_nancheck, - LAPACKE_zpb_nancheck, - LAPACKE_zpb_trans, - LAPACKE_zpf_nancheck, - LAPACKE_zpf_trans, - LAPACKE_zpo_nancheck, - LAPACKE_zpo_trans, - LAPACKE_zpp_nancheck, - LAPACKE_zpp_trans, - LAPACKE_zpt_nancheck, - LAPACKE_zsp_nancheck, - LAPACKE_zsp_trans, - LAPACKE_zst_nancheck, - LAPACKE_zsy_nancheck, - LAPACKE_zsy_trans, - LAPACKE_ztb_nancheck, - LAPACKE_ztb_trans, - LAPACKE_ztf_nancheck, - LAPACKE_ztf_trans, - LAPACKE_ztp_nancheck, - LAPACKE_ztp_trans, - LAPACKE_ztr_nancheck, - LAPACKE_ztr_trans, - lapack_make_complex_float, - lapack_make_complex_double, - - # @(SRC_OBJ) from `lapack-3.5.0/lapacke/src/Makefile` LAPACKE_cbbcsd, LAPACKE_cbbcsd_work, LAPACKE_cbdsqr, @@ -1405,6 +1423,162 @@ LAPACKE_cupgtr_work, LAPACKE_cupmtr, LAPACKE_cupmtr_work, + LAPACKE_csyr, + LAPACKE_csyr_work, + LAPACKE_clatms, + LAPACKE_clatms_work, + LAPACKE_clagge, + LAPACKE_clagge_work, + LAPACKE_claghe, + LAPACKE_claghe_work, + LAPACKE_clagsy, + LAPACKE_clagsy_work, + LAPACKE_cgejsv, + LAPACKE_cgejsv_work, + LAPACKE_cgesvdx, + LAPACKE_cgesvdx_work, + LAPACKE_cgesvj, + LAPACKE_cgesvj_work, + LAPACKE_cgetrf2, + LAPACKE_cgetrf2_work, + LAPACKE_cgges3, + LAPACKE_cgges3_work, + LAPACKE_cggev3, + LAPACKE_cggev3_work, + LAPACKE_cgghd3, + LAPACKE_cgghd3_work, + LAPACKE_cggsvd3, + LAPACKE_cggsvd3_work, + LAPACKE_cggsvp3, + LAPACKE_cggsvp3_work, + LAPACKE_chetrf_rook, + LAPACKE_chetrf_rook_work, + LAPACKE_chetrs_rook, + LAPACKE_chetrs_rook_work, + LAPACKE_clapmt, + LAPACKE_clapmt_work, + LAPACKE_clascl, + LAPACKE_clascl_work, + LAPACKE_cpotrf2, + LAPACKE_cpotrf2_work, + LAPACKE_csytrf_rook, + LAPACKE_csytrf_rook_work, + LAPACKE_csytrs_rook, + LAPACKE_csytrs_rook_work, + LAPACKE_cuncsd2by1, + LAPACKE_cuncsd2by1_work, + LAPACKE_cgelq, + LAPACKE_cgelq_work, + LAPACKE_cgemlq, + LAPACKE_cgemlq_work, + LAPACKE_cgemqr, + LAPACKE_cgemqr_work, + LAPACKE_cgeqr, + LAPACKE_cgeqr_work, + LAPACKE_cgetsls, + LAPACKE_cgetsls_work, + LAPACKE_chbev_2stage, + LAPACKE_chbev_2stage_work, + LAPACKE_chbevd_2stage, + LAPACKE_chbevd_2stage_work, + LAPACKE_chbevx_2stage, + LAPACKE_chbevx_2stage_work, + LAPACKE_checon_3, + LAPACKE_checon_3_work, + LAPACKE_cheev_2stage, + LAPACKE_cheev_2stage_work, + LAPACKE_cheevd_2stage, + LAPACKE_cheevd_2stage_work, + LAPACKE_cheevr_2stage, + LAPACKE_cheevr_2stage_work, + LAPACKE_cheevx_2stage, + LAPACKE_cheevx_2stage_work, + LAPACKE_chegv_2stage, + LAPACKE_chegv_2stage_work, + LAPACKE_chesv_aa, + LAPACKE_chesv_aa_work, + LAPACKE_chesv_rk, + LAPACKE_chesv_rk_work, + LAPACKE_chetrf_aa, + LAPACKE_chetrf_aa_work, + LAPACKE_chetrf_rk, + LAPACKE_chetrf_rk_work, + LAPACKE_chetri_3, + LAPACKE_chetri_3_work, + LAPACKE_chetrs_aa, + LAPACKE_chetrs_aa_work, + LAPACKE_chetrs_3, + LAPACKE_chetrs_3_work, + LAPACKE_csycon_3, + LAPACKE_csycon_3_work, + LAPACKE_csysv_aa, + LAPACKE_csysv_aa_work, + LAPACKE_csysv_rk, + LAPACKE_csysv_rk_work, + LAPACKE_csytrf_aa, + LAPACKE_csytrf_aa_work, + LAPACKE_csytrf_rk, + LAPACKE_csytrf_rk_work, + LAPACKE_csytri_3, + LAPACKE_csytri_3_work, + LAPACKE_csytrs_aa, + LAPACKE_csytrs_aa_work, + LAPACKE_csytrs_3, + LAPACKE_csytrs_3_work, + LAPACKE_chesv_aa_2stage, + LAPACKE_chesv_aa_2stage_work, + LAPACKE_chetrf_aa_2stage, + LAPACKE_chetrf_aa_2stage_work, + LAPACKE_chetrs_aa_2stage, + LAPACKE_chetrs_aa_2stage_work, + LAPACKE_clacrm, + LAPACKE_clacrm_work, + LAPACKE_clarcm, + LAPACKE_clarcm_work, + LAPACKE_classq, + LAPACKE_classq_work, + LAPACKE_csysv_aa_2stage, + LAPACKE_csysv_aa_2stage_work, + LAPACKE_csytrf_aa_2stage, + LAPACKE_csytrf_aa_2stage_work, + LAPACKE_csytrs_aa_2stage, + LAPACKE_csytrs_aa_2stage_work, +); +@lapackeobjsd = ( + LAPACKE_dgb_nancheck, + LAPACKE_dgb_trans, + LAPACKE_dge_nancheck, + LAPACKE_dge_trans, + LAPACKE_dgg_nancheck, + LAPACKE_dgg_trans, + LAPACKE_dgt_nancheck, + LAPACKE_dhs_nancheck, + LAPACKE_dhs_trans, + LAPACKE_d_nancheck, + LAPACKE_dpb_nancheck, + LAPACKE_dpb_trans, + LAPACKE_dpf_nancheck, + LAPACKE_dpf_trans, + LAPACKE_dpo_nancheck, + LAPACKE_dpo_trans, + LAPACKE_dpp_nancheck, + LAPACKE_dpp_trans, + LAPACKE_dpt_nancheck, + LAPACKE_dsb_nancheck, + LAPACKE_dsb_trans, + LAPACKE_dsp_nancheck, + LAPACKE_dsp_trans, + LAPACKE_dst_nancheck, + LAPACKE_dsy_nancheck, + LAPACKE_dsy_trans, + LAPACKE_dtb_nancheck, + LAPACKE_dtb_trans, + LAPACKE_dtf_nancheck, + LAPACKE_dtf_trans, + LAPACKE_dtp_nancheck, + LAPACKE_dtp_trans, + LAPACKE_dtr_nancheck, + LAPACKE_dtr_trans, LAPACKE_dbbcsd, LAPACKE_dbbcsd_work, LAPACKE_dbdsdc, @@ -1889,6 +2063,130 @@ LAPACKE_dtrttp_work, LAPACKE_dtzrzf, LAPACKE_dtzrzf_work, + LAPACKE_dlatms, + LAPACKE_dlatms_work, + LAPACKE_dlagge, + LAPACKE_dlagge_work, + LAPACKE_dlagsy, + LAPACKE_dlagsy_work, + LAPACKE_dbdsvdx, + LAPACKE_dbdsvdx_work, + LAPACKE_dgesvdx, + LAPACKE_dgesvdx_work, + LAPACKE_dgetrf2, + LAPACKE_dgetrf2_work, + LAPACKE_dgges3, + LAPACKE_dgges3_work, + LAPACKE_dggev3, + LAPACKE_dggev3_work, + LAPACKE_dgghd3, + LAPACKE_dgghd3_work, + LAPACKE_dggsvd3, + LAPACKE_dggsvd3_work, + LAPACKE_dggsvp3, + LAPACKE_dggsvp3_work, + LAPACKE_dlapmt, + LAPACKE_dlapmt_work, + LAPACKE_dlascl, + LAPACKE_dlascl_work, + LAPACKE_dorcsd2by1, + LAPACKE_dorcsd2by1_work, + LAPACKE_dpotrf2, + LAPACKE_dpotrf2_work, + LAPACKE_dsytrf_rook, + LAPACKE_dsytrf_rook_work, + LAPACKE_dsytrs_rook, + LAPACKE_dsytrs_rook_work, + LAPACKE_dgelq, + LAPACKE_dgelq_work, + LAPACKE_dgemlq, + LAPACKE_dgemlq_work, + LAPACKE_dgemqr, + LAPACKE_dgemqr_work, + LAPACKE_dgeqr, + LAPACKE_dgeqr_work, + LAPACKE_dgetsls, + LAPACKE_dgetsls_work, + LAPACKE_dsbev_2stage, + LAPACKE_dsbev_2stage_work, + LAPACKE_dsbevd_2stage, + LAPACKE_dsbevd_2stage_work, + LAPACKE_dsbevx_2stage, + LAPACKE_dsbevx_2stage_work, + LAPACKE_dsycon_3, + LAPACKE_dsycon_3_work, + LAPACKE_dsyev_2stage, + LAPACKE_dsyev_2stage_work, + LAPACKE_dsyevd_2stage, + LAPACKE_dsyevd_2stage_work, + LAPACKE_dsyevr_2stage, + LAPACKE_dsyevr_2stage_work, + LAPACKE_dsyevx_2stage, + LAPACKE_dsyevx_2stage_work, + LAPACKE_dsygv_2stage, + LAPACKE_dsygv_2stage_work, + LAPACKE_dsysv_aa, + LAPACKE_dsysv_aa_work, + LAPACKE_dsysv_rk, + LAPACKE_dsysv_rk_work, + LAPACKE_dsytrf_aa, + LAPACKE_dsytrf_aa_work, + LAPACKE_dsytrf_rk, + LAPACKE_dsytrf_rk_work, + LAPACKE_dsytri_3, + LAPACKE_dsytri_3_work, + LAPACKE_dsytrs_aa, + LAPACKE_dsytrs_aa_work, + LAPACKE_dsytrs_3, + LAPACKE_dsytrs_3_work, + LAPACKE_dlassq, + LAPACKE_dlassq_work, + LAPACKE_dsysv_aa_2stage, + LAPACKE_dsysv_aa_2stage_work, + LAPACKE_dsytrf_aa_2stage, + LAPACKE_dsytrf_aa_2stage_work, + LAPACKE_dsytrs_aa_2stage, + LAPACKE_dsytrs_aa_2stage_work, + LAPACKE_dgesvdq, + LAPACKE_dgesvdq_work, + LAPACKE_slag2d, + LAPACKE_slag2d_work, +); +@lapackeobjss = ( + LAPACKE_sgb_nancheck, + LAPACKE_sgb_trans, + LAPACKE_sge_nancheck, + LAPACKE_sge_trans, + LAPACKE_sgg_nancheck, + LAPACKE_sgg_trans, + LAPACKE_sgt_nancheck, + LAPACKE_shs_nancheck, + LAPACKE_shs_trans, + LAPACKE_s_nancheck, + LAPACKE_spb_nancheck, + LAPACKE_spb_trans, + LAPACKE_spf_nancheck, + LAPACKE_spf_trans, + LAPACKE_spo_nancheck, + LAPACKE_spo_trans, + LAPACKE_spp_nancheck, + LAPACKE_spp_trans, + LAPACKE_spt_nancheck, + LAPACKE_ssb_nancheck, + LAPACKE_ssb_trans, + LAPACKE_ssp_nancheck, + LAPACKE_ssp_trans, + LAPACKE_sst_nancheck, + LAPACKE_ssy_nancheck, + LAPACKE_ssy_trans, + LAPACKE_stb_nancheck, + LAPACKE_stb_trans, + LAPACKE_stf_nancheck, + LAPACKE_stf_trans, + LAPACKE_stp_nancheck, + LAPACKE_stp_trans, + LAPACKE_str_nancheck, + LAPACKE_str_trans, LAPACKE_sbbcsd, LAPACKE_sbbcsd_work, LAPACKE_sbdsdc, @@ -2035,8 +2333,6 @@ LAPACKE_slacn2_work, LAPACKE_slacpy, LAPACKE_slacpy_work, - LAPACKE_slag2d, - LAPACKE_slag2d_work, LAPACKE_slamch, LAPACKE_slamch_work, LAPACKE_slange, @@ -2367,6 +2663,134 @@ LAPACKE_strttp_work, LAPACKE_stzrzf, LAPACKE_stzrzf_work, + LAPACKE_slatms, + LAPACKE_slatms_work, + LAPACKE_slagge, + LAPACKE_slagge_work, + LAPACKE_slagsy, + LAPACKE_slagsy_work, + LAPACKE_sbdsvdx, + LAPACKE_sbdsvdx_work, + LAPACKE_sgesvdx, + LAPACKE_sgesvdx_work, + LAPACKE_sgetrf2, + LAPACKE_sgetrf2_work, + LAPACKE_sgges3, + LAPACKE_sgges3_work, + LAPACKE_sggev3, + LAPACKE_sggev3_work, + LAPACKE_sgghd3, + LAPACKE_sgghd3_work, + LAPACKE_sggsvd3, + LAPACKE_sggsvd3_work, + LAPACKE_sggsvp3, + LAPACKE_sggsvp3_work, + LAPACKE_slapmt, + LAPACKE_slapmt_work, + LAPACKE_slascl, + LAPACKE_slascl_work, + LAPACKE_sorcsd2by1, + LAPACKE_sorcsd2by1_work, + LAPACKE_spotrf2, + LAPACKE_spotrf2_work, + LAPACKE_ssytrf_rook, + LAPACKE_ssytrf_rook_work, + LAPACKE_ssytrs_rook, + LAPACKE_ssytrs_rook_work, + LAPACKE_stpqrt, + LAPACKE_stpqrt_work, + LAPACKE_sgelq, + LAPACKE_sgelq_work, + LAPACKE_sgemlq, + LAPACKE_sgemlq_work, + LAPACKE_sgemqr, + LAPACKE_sgemqr_work, + LAPACKE_sgeqr, + LAPACKE_sgeqr_work, + LAPACKE_sgetsls, + LAPACKE_sgetsls_work, + LAPACKE_ssbev_2stage, + LAPACKE_ssbev_2stage_work, + LAPACKE_ssbevd_2stage, + LAPACKE_ssbevd_2stage_work, + LAPACKE_ssbevx_2stage, + LAPACKE_ssbevx_2stage_work, + LAPACKE_ssycon_3, + LAPACKE_ssycon_3_work, + LAPACKE_ssyev_2stage, + LAPACKE_ssyev_2stage_work, + LAPACKE_ssyevd_2stage, + LAPACKE_ssyevd_2stage_work, + LAPACKE_ssyevr_2stage, + LAPACKE_ssyevr_2stage_work, + LAPACKE_ssyevx_2stage, + LAPACKE_ssyevx_2stage_work, + LAPACKE_ssygv_2stage, + LAPACKE_ssygv_2stage_work, + LAPACKE_ssysv_aa, + LAPACKE_ssysv_aa_work, + LAPACKE_ssysv_rk, + LAPACKE_ssysv_rk_work, + LAPACKE_ssytrf_aa, + LAPACKE_ssytrf_aa_work, + LAPACKE_ssytrf_rk, + LAPACKE_ssytrf_rk_work, + LAPACKE_ssytri_3, + LAPACKE_ssytri_3_work, + LAPACKE_ssytrs_aa, + LAPACKE_ssytrs_aa_work, + LAPACKE_ssytrs_3, + LAPACKE_ssytrs_3_work, + LAPACKE_slassq, + LAPACKE_slassq_work, + LAPACKE_ssysv_aa_2stage, + LAPACKE_ssysv_aa_2stage_work, + LAPACKE_ssytrf_aa_2stage, + LAPACKE_ssytrf_aa_2stage_work, + LAPACKE_ssytrs_aa_2stage, + LAPACKE_ssytrs_aa_2stage_work, + LAPACKE_sgesvdq, + LAPACKE_sgesvdq_work, +); +@lapackeobjsz = ( + LAPACKE_zgb_nancheck, + LAPACKE_zgb_trans, + LAPACKE_zge_nancheck, + LAPACKE_zge_trans, + LAPACKE_zgg_nancheck, + LAPACKE_zgg_trans, + LAPACKE_zgt_nancheck, + LAPACKE_zhb_nancheck, + LAPACKE_zhb_trans, + LAPACKE_zhe_nancheck, + LAPACKE_zhe_trans, + LAPACKE_zhp_nancheck, + LAPACKE_zhp_trans, + LAPACKE_zhs_nancheck, + LAPACKE_zhs_trans, + LAPACKE_z_nancheck, + LAPACKE_zpb_nancheck, + LAPACKE_zpb_trans, + LAPACKE_zpf_nancheck, + LAPACKE_zpf_trans, + LAPACKE_zpo_nancheck, + LAPACKE_zpo_trans, + LAPACKE_zpp_nancheck, + LAPACKE_zpp_trans, + LAPACKE_zpt_nancheck, + LAPACKE_zsp_nancheck, + LAPACKE_zsp_trans, + LAPACKE_zst_nancheck, + LAPACKE_zsy_nancheck, + LAPACKE_zsy_trans, + LAPACKE_ztb_nancheck, + LAPACKE_ztb_trans, + LAPACKE_ztf_nancheck, + LAPACKE_ztf_trans, + LAPACKE_ztp_nancheck, + LAPACKE_ztp_trans, + LAPACKE_ztr_nancheck, + LAPACKE_ztr_trans, LAPACKE_zbbcsd, LAPACKE_zbbcsd_work, LAPACKE_zbdsqr, @@ -2864,11 +3288,7 @@ LAPACKE_zupmtr, LAPACKE_zupmtr_work, LAPACKE_zsyr, - LAPACKE_csyr, LAPACKE_zsyr_work, - LAPACKE_csyr_work, - LAPACKE_ilaver, - ## @(SRCX_OBJ) from `lapack-3.4.1/lapacke/src/Makefile` ## Not exported: requires LAPACKE_EXTENDED to be set and depends on the ## corresponding LAPACK extended precision routines. @@ -2948,128 +3368,15 @@ ## @(MATGEN_OBJ) from `lapack-3.4.1/lapacke/src/Makefile` ## Not exported: requires LAPACKE_TESTING to be set and depends on libtmg ## (see `lapack-3.4.1/TESTING/MATGEN`). - LAPACKE_clatms, - LAPACKE_clatms_work, - LAPACKE_dlatms, - LAPACKE_dlatms_work, - LAPACKE_slatms, - LAPACKE_slatms_work, LAPACKE_zlatms, LAPACKE_zlatms_work, - LAPACKE_clagge, - LAPACKE_clagge_work, - LAPACKE_dlagge, - LAPACKE_dlagge_work, - LAPACKE_slagge, - LAPACKE_slagge_work, LAPACKE_zlagge, LAPACKE_zlagge_work, - LAPACKE_claghe, - LAPACKE_claghe_work, LAPACKE_zlaghe, LAPACKE_zlaghe_work, - LAPACKE_clagsy, - LAPACKE_clagsy_work, - LAPACKE_dlagsy, - LAPACKE_dlagsy_work, - LAPACKE_slagsy, - LAPACKE_slagsy_work, LAPACKE_zlagsy, LAPACKE_zlagsy_work, ## new function from lapack-3.6.0 - - LAPACKE_cgejsv, - LAPACKE_cgejsv_work, - LAPACKE_cgesvdx, - LAPACKE_cgesvdx_work, - LAPACKE_cgesvj, - LAPACKE_cgesvj_work, - LAPACKE_cgetrf2, - LAPACKE_cgetrf2_work, - LAPACKE_cgges3, - LAPACKE_cgges3_work, - LAPACKE_cggev3, - LAPACKE_cggev3_work, - LAPACKE_cgghd3, - LAPACKE_cgghd3_work, - LAPACKE_cggsvd3, - LAPACKE_cggsvd3_work, - LAPACKE_cggsvp3, - LAPACKE_cggsvp3_work, - LAPACKE_chetrf_rook, - LAPACKE_chetrf_rook_work, - LAPACKE_chetrs_rook, - LAPACKE_chetrs_rook_work, - LAPACKE_clapmt, - LAPACKE_clapmt_work, - LAPACKE_clascl, - LAPACKE_clascl_work, - LAPACKE_cpotrf2, - LAPACKE_cpotrf2_work, - LAPACKE_csytrf_rook, - LAPACKE_csytrf_rook_work, - LAPACKE_csytrs_rook, - LAPACKE_csytrs_rook_work, - LAPACKE_cuncsd2by1, - LAPACKE_cuncsd2by1_work, - LAPACKE_dbdsvdx, - LAPACKE_dbdsvdx_work, - LAPACKE_dgesvdx, - LAPACKE_dgesvdx_work, - LAPACKE_dgetrf2, - LAPACKE_dgetrf2_work, - LAPACKE_dgges3, - LAPACKE_dgges3_work, - LAPACKE_dggev3, - LAPACKE_dggev3_work, - LAPACKE_dgghd3, - LAPACKE_dgghd3_work, - LAPACKE_dggsvd3, - LAPACKE_dggsvd3_work, - LAPACKE_dggsvp3, - LAPACKE_dggsvp3_work, - LAPACKE_dlapmt, - LAPACKE_dlapmt_work, - LAPACKE_dlascl, - LAPACKE_dlascl_work, - LAPACKE_dorcsd2by1, - LAPACKE_dorcsd2by1_work, - LAPACKE_dpotrf2, - LAPACKE_dpotrf2_work, - LAPACKE_dsytrf_rook, - LAPACKE_dsytrf_rook_work, - LAPACKE_dsytrs_rook, - LAPACKE_dsytrs_rook_work, - LAPACKE_sbdsvdx, - LAPACKE_sbdsvdx_work, - LAPACKE_sgesvdx, - LAPACKE_sgesvdx_work, - LAPACKE_sgetrf2, - LAPACKE_sgetrf2_work, - LAPACKE_sgges3, - LAPACKE_sgges3_work, - LAPACKE_sggev3, - LAPACKE_sggev3_work, - LAPACKE_sgghd3, - LAPACKE_sgghd3_work, - LAPACKE_sggsvd3, - LAPACKE_sggsvd3_work, - LAPACKE_sggsvp3, - LAPACKE_sggsvp3_work, - LAPACKE_slapmt, - LAPACKE_slapmt_work, - LAPACKE_slascl, - LAPACKE_slascl_work, - LAPACKE_sorcsd2by1, - LAPACKE_sorcsd2by1_work, - LAPACKE_spotrf2, - LAPACKE_spotrf2_work, - LAPACKE_ssytrf_rook, - LAPACKE_ssytrf_rook_work, - LAPACKE_ssytrs_rook, - LAPACKE_ssytrs_rook_work, - LAPACKE_stpqrt, - LAPACKE_stpqrt_work, LAPACKE_zgejsv, LAPACKE_zgejsv_work, LAPACKE_zgesvdx, @@ -3106,148 +3413,6 @@ LAPACKE_zuncsd2by1_work, ## new function from lapack-3.7.0 - LAPACKE_cgelq, - LAPACKE_cgelq_work, - LAPACKE_cgemlq, - LAPACKE_cgemlq_work, - LAPACKE_cgemqr, - LAPACKE_cgemqr_work, - LAPACKE_cgeqr, - LAPACKE_cgeqr_work, - LAPACKE_cgetsls, - LAPACKE_cgetsls_work, - LAPACKE_chbev_2stage, - LAPACKE_chbev_2stage_work, - LAPACKE_chbevd_2stage, - LAPACKE_chbevd_2stage_work, - LAPACKE_chbevx_2stage, - LAPACKE_chbevx_2stage_work, - LAPACKE_checon_3, - LAPACKE_checon_3_work, - LAPACKE_cheev_2stage, - LAPACKE_cheev_2stage_work, - LAPACKE_cheevd_2stage, - LAPACKE_cheevd_2stage_work, - LAPACKE_cheevr_2stage, - LAPACKE_cheevr_2stage_work, - LAPACKE_cheevx_2stage, - LAPACKE_cheevx_2stage_work, - LAPACKE_chegv_2stage, - LAPACKE_chegv_2stage_work, - LAPACKE_chesv_aa, - LAPACKE_chesv_aa_work, - LAPACKE_chesv_rk, - LAPACKE_chesv_rk_work, - LAPACKE_chetrf_aa, - LAPACKE_chetrf_aa_work, - LAPACKE_chetrf_rk, - LAPACKE_chetrf_rk_work, - LAPACKE_chetri_3, - LAPACKE_chetri_3_work, - LAPACKE_chetrs_aa, - LAPACKE_chetrs_aa_work, - LAPACKE_chetrs_3, - LAPACKE_chetrs_3_work, - LAPACKE_csycon_3, - LAPACKE_csycon_3_work, - LAPACKE_csysv_aa, - LAPACKE_csysv_aa_work, - LAPACKE_csysv_rk, - LAPACKE_csysv_rk_work, - LAPACKE_csytrf_aa, - LAPACKE_csytrf_aa_work, - LAPACKE_csytrf_rk, - LAPACKE_csytrf_rk_work, - LAPACKE_csytri_3, - LAPACKE_csytri_3_work, - LAPACKE_csytrs_aa, - LAPACKE_csytrs_aa_work, - LAPACKE_csytrs_3, - LAPACKE_csytrs_3_work, - LAPACKE_dgelq, - LAPACKE_dgelq_work, - LAPACKE_dgemlq, - LAPACKE_dgemlq_work, - LAPACKE_dgemqr, - LAPACKE_dgemqr_work, - LAPACKE_dgeqr, - LAPACKE_dgeqr_work, - LAPACKE_dgetsls, - LAPACKE_dgetsls_work, - LAPACKE_dsbev_2stage, - LAPACKE_dsbev_2stage_work, - LAPACKE_dsbevd_2stage, - LAPACKE_dsbevd_2stage_work, - LAPACKE_dsbevx_2stage, - LAPACKE_dsbevx_2stage_work, - LAPACKE_dsycon_3, - LAPACKE_dsycon_3_work, - LAPACKE_dsyev_2stage, - LAPACKE_dsyev_2stage_work, - LAPACKE_dsyevd_2stage, - LAPACKE_dsyevd_2stage_work, - LAPACKE_dsyevr_2stage, - LAPACKE_dsyevr_2stage_work, - LAPACKE_dsyevx_2stage, - LAPACKE_dsyevx_2stage_work, - LAPACKE_dsygv_2stage, - LAPACKE_dsygv_2stage_work, - LAPACKE_dsysv_aa, - LAPACKE_dsysv_aa_work, - LAPACKE_dsysv_rk, - LAPACKE_dsysv_rk_work, - LAPACKE_dsytrf_aa, - LAPACKE_dsytrf_aa_work, - LAPACKE_dsytrf_rk, - LAPACKE_dsytrf_rk_work, - LAPACKE_dsytri_3, - LAPACKE_dsytri_3_work, - LAPACKE_dsytrs_aa, - LAPACKE_dsytrs_aa_work, - LAPACKE_dsytrs_3, - LAPACKE_dsytrs_3_work, - LAPACKE_sgelq, - LAPACKE_sgelq_work, - LAPACKE_sgemlq, - LAPACKE_sgemlq_work, - LAPACKE_sgemqr, - LAPACKE_sgemqr_work, - LAPACKE_sgeqr, - LAPACKE_sgeqr_work, - LAPACKE_sgetsls, - LAPACKE_sgetsls_work, - LAPACKE_ssbev_2stage, - LAPACKE_ssbev_2stage_work, - LAPACKE_ssbevd_2stage, - LAPACKE_ssbevd_2stage_work, - LAPACKE_ssbevx_2stage, - LAPACKE_ssbevx_2stage_work, - LAPACKE_ssycon_3, - LAPACKE_ssycon_3_work, - LAPACKE_ssyev_2stage, - LAPACKE_ssyev_2stage_work, - LAPACKE_ssyevd_2stage, - LAPACKE_ssyevd_2stage_work, - LAPACKE_ssyevr_2stage, - LAPACKE_ssyevr_2stage_work, - LAPACKE_ssyevx_2stage, - LAPACKE_ssyevx_2stage_work, - LAPACKE_ssygv_2stage, - LAPACKE_ssygv_2stage_work, - LAPACKE_ssysv_aa, - LAPACKE_ssysv_aa_work, - LAPACKE_ssysv_rk, - LAPACKE_ssysv_rk_work, - LAPACKE_ssytrf_aa, - LAPACKE_ssytrf_aa_work, - LAPACKE_ssytrf_rk, - LAPACKE_ssytrf_rk_work, - LAPACKE_ssytri_3, - LAPACKE_ssytri_3_work, - LAPACKE_ssytrs_aa, - LAPACKE_ssytrs_aa_work, - LAPACKE_ssytrs_3, - LAPACKE_ssytrs_3_work, LAPACKE_zgelq, LAPACKE_zgelq_work, LAPACKE_zgemlq, @@ -3308,42 +3473,6 @@ LAPACKE_zsytrs_3_work, ## new function from lapack-3.8.0 - LAPACKE_chesv_aa_2stage, - LAPACKE_chesv_aa_2stage_work, - LAPACKE_chetrf_aa_2stage, - LAPACKE_chetrf_aa_2stage_work, - LAPACKE_chetrs_aa_2stage, - LAPACKE_chetrs_aa_2stage_work, - LAPACKE_clacrm, - LAPACKE_clacrm_work, - LAPACKE_clarcm, - LAPACKE_clarcm_work, - LAPACKE_classq, - LAPACKE_classq_work, - LAPACKE_csysv_aa_2stage, - LAPACKE_csysv_aa_2stage_work, - LAPACKE_csytrf_aa_2stage, - LAPACKE_csytrf_aa_2stage_work, - LAPACKE_csytrs_aa_2stage, - LAPACKE_csytrs_aa_2stage_work, - LAPACKE_dlassq, - LAPACKE_dlassq_work, - LAPACKE_dsysv_aa_2stage, - LAPACKE_dsysv_aa_2stage_work, - LAPACKE_dsytrf_aa_2stage, - LAPACKE_dsytrf_aa_2stage_work, - LAPACKE_dsytrs_aa_2stage, - LAPACKE_dsytrs_aa_2stage_work, - LAPACKE_get_nancheck, - LAPACKE_set_nancheck, - LAPACKE_slassq, - LAPACKE_slassq_work, - LAPACKE_ssysv_aa_2stage, - LAPACKE_ssysv_aa_2stage_work, - LAPACKE_ssytrf_aa_2stage, - LAPACKE_ssytrf_aa_2stage_work, - LAPACKE_ssytrs_aa_2stage, - LAPACKE_ssytrs_aa_2stage_work, LAPACKE_zhesv_aa_2stage, LAPACKE_zhesv_aa_2stage_work, LAPACKE_zhetrf_aa_2stage, @@ -3362,36 +3491,19 @@ LAPACKE_zsytrf_aa_2stage_work, LAPACKE_zsytrs_aa_2stage, LAPACKE_zsytrs_aa_2stage_work, - # new functions from 3.9.0 - LAPACKE_dgesvdq, - LAPACKE_dgesvdq_work, - LAPACKE_sgesvdq, - LAPACKE_sgesvdq_work, LAPACKE_zgesvdq, LAPACKE_zgesvdq_work - ); #These function may need 2 underscores. @lapack_embeded_underscore_objs=( - xerbla_array, chla_transtype, slasyf_rook, + xerbla_array, chla_transtype, + ); +@lapack_embeded_underscore_objs_s=( + slasyf_rook, ssytf2_rook, ssytrf_rook, ssytrs_rook, ssytri_rook, ssycon_rook, ssysv_rook, - chetf2_rook, chetrf_rook, chetri_rook, - chetrs_rook, checon_rook, chesv_rook, - clahef_rook, clasyf_rook, - csytf2_rook, csytrf_rook, csytrs_rook, - csytri_rook, csycon_rook, csysv_rook, - dlasyf_rook, - dsytf2_rook, dsytrf_rook, dsytrs_rook, - dsytri_rook, dsycon_rook, dsysv_rook, - zhetf2_rook, zhetrf_rook, zhetri_rook, - zhetrs_rook, zhecon_rook, zhesv_rook, - zlahef_rook, zlasyf_rook, - zsytf2_rook, zsytrf_rook, zsytrs_rook, - zsytri_rook, zsycon_rook, zsysv_rook, -# 3.7.0 slasyf_rk, ssyconvf_rook, ssytf2_rk, ssytrf_rk, ssytrs_3, ssytri_3, ssytri_3x, ssycon_3, ssysv_rk, @@ -3400,15 +3512,18 @@ ssytrd_sb2st, ssb2st_kernels, ssyevd_2stage, ssyev_2stage, ssyevx_2stage, ssyevr_2stage, ssbev_2stage, ssbevx_2stage, ssbevd_2stage, - ssygv_2stage, dlasyf_rk, dsyconvf_rook, - dsytf2_rk, dsytrf_rk, dsytrs_3, - dsytri_3, dsytri_3x, dsycon_3, - dsysv_rk, dlasyf_aa, dsysv_aa, - dsytrf_aa, dsytrs_aa, dsytrd_2stage, - dsytrd_sy2sb, dsytrd_sb2st, dsb2st_kernels, - dsyevd_2stage, dsyev_2stage, dsyevx_2stage, - dsyevr_2stage, dsbev_2stage, dsbevx_2stage, - dsbevd_2stage, dsygv_2stage, chetf2_rk, + ssygv_2stage, + ssysv_aa_2stage, ssytrf_aa_2stage, + ssytrs_aa_2stage, + slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col, +); +@lapack_embeded_underscore_objs_c=( + chetf2_rook, chetrf_rook, chetri_rook, + chetrs_rook, checon_rook, chesv_rook, + clahef_rook, clasyf_rook, + csytf2_rook, csytrf_rook, csytrs_rook, + csytri_rook, csycon_rook, csysv_rook, + chetf2_rk, chetrf_rk, chetri_3, chetri_3x, chetrs_3, checon_3, chesv_rk, chesv_aa, chetrf_aa, chetrs_aa, @@ -3421,6 +3536,35 @@ chb2st_kernels, cheevd_2stage, cheev_2stage, cheevx_2stage, cheevr_2stage, chbev_2stage, chbevx_2stage, chbevd_2stage, chegv_2stage, + chesv_aa_2stage, + chetrf_aa_2stage, chetrs_aa_2stage, + csysv_aa_2stage, csytrf_aa_2stage, + csytrs_aa_2stage, + claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col, +); +@lapack_embeded_underscore_objs_d=( + dlasyf_rook, + dsytf2_rook, dsytrf_rook, dsytrs_rook, + dsytri_rook, dsycon_rook, dsysv_rook, + dlasyf_rk, dsyconvf_rook, + dsytf2_rk, dsytrf_rk, dsytrs_3, + dsytri_3, dsytri_3x, dsycon_3, + dsysv_rk, dlasyf_aa, dsysv_aa, + dsytrf_aa, dsytrs_aa, dsytrd_2stage, + dsytrd_sy2sb, dsytrd_sb2st, dsb2st_kernels, + dsyevd_2stage, dsyev_2stage, dsyevx_2stage, + dsyevr_2stage, dsbev_2stage, dsbevx_2stage, + dsbevd_2stage, dsygv_2stage, + dsysv_aa_2stage, + dsytrf_aa_2stage, dsytrs_aa_2stage, + dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col, +); +@lapack_embeded_underscore_objs_z=( + zhetf2_rook, zhetrf_rook, zhetri_rook, + zhetrs_rook, zhecon_rook, zhesv_rook, + zlahef_rook, zlasyf_rook, + zsytf2_rook, zsytrf_rook, zsytrs_rook, + zsytri_rook, zsycon_rook, zsysv_rook, zhetf2_rk, zhetrf_rk, zhetri_3, zhetri_3x, zhetrs_3, zhecon_3, zhesv_rk, zhesv_aa, zhetrf_aa, @@ -3434,22 +3578,10 @@ zheev_2stage, zheevx_2stage, zheevr_2stage, zhbev_2stage, zhbevx_2stage, zhbevd_2stage, zhegv_2stage, -# 3.8.0 - ssysv_aa_2stage, ssytrf_aa_2stage, - ssytrs_aa_2stage, chesv_aa_2stage, - chetrf_aa_2stage, chetrs_aa_2stage, - csysv_aa_2stage, csytrf_aa_2stage, - csytrs_aa_2stage, dsysv_aa_2stage, - dsytrf_aa_2stage, dsytrs_aa_2stage, zhesv_aa_2stage, zhetrf_aa_2stage, zhetrs_aa_2stage, zsysv_aa_2stage, zsytrf_aa_2stage, zsytrs_aa_2stage, -# 3.9.0 - claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col, - dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col, - slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col, zlaunhr_col_getrfnp, zlaunhr_col_getrfnp2, zunhr_col - ); @@ -3461,6 +3593,42 @@ if ($ARGV[12] == 1) { @blasobjs = (@blasobjs, @halfblasobjs); @cblasobjs = (@cblasobjs, @halfcblasobjs); } +if ($ARGV[13] == 1) { + @blasobjs = (@blasobjs, @blasobjss); + @cblasobjs = (@cblasobjs, @cblasobjss); + @lapackobjs = (@lapackobjs, @lapackobjss); + @lapack2objs = (@lapack2objs, @lapack2objss); + @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s); + @lapackeobjs = (@lapackeobjs, @lapackeobjss); +} +if ($ARGV[14] == 1) { + @blasobjs = (@blasobjs, @blasobjsd); + @cblasobjs = (@cblasobjs, @cblasobjsd); + @lapackobjs = (@lapackobjs, @lapackobjsd); + @lapack2objs = (@lapack2objs, @lapack2objsd); + @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d); + @lapackeobjs = (@lapackeobjs, @lapackeobjsd); +} +if ($ARGV[15] == 1) { + @blasobjs = (@blasobjs, @blasobjsc); + @cblasobjs = (@cblasobjs, @cblasobjsc); + @gemm3mobjs = (@gemm3mobjs, @gemm3mobjsc); + @cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsc); + @lapackobjs = (@lapackobjs, @lapackobjsc); + @lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc); + @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c); + @lapackeobjs = (@lapackeobjs, @lapackeobjsc); +} +if ($ARGV[16] == 1) { + @blasobjs = (@blasobjs, @blasobjsz); + @cblasobjs = (@cblasobjs, @cblasobjsz); + @gemm3mobjs = (@gemm3mobjs, @gemm3mobjsz); + @cblasgemm3mobjs = (@cblasgemm3mobjs, @sblasgemm3mobjsz); + @lapackobjs = (@lapackobjs, @lapackobjsz); + @lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc); + @lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z); + @lapackeobjs = (@lapackeobjs, @lapackeobjsz); +} if ($ARGV[8] == 1) { #ONLY_CBLAS=1 @underscore_objs = (@misc_underscore_objs); diff --git a/interface/CMakeLists.txt b/interface/CMakeLists.txt index 7a8fc6698..5346ecadd 100644 --- a/interface/CMakeLists.txt +++ b/interface/CMakeLists.txt @@ -83,8 +83,12 @@ foreach (CBLAS_FLAG ${CBLAS_FLAGS}) GenerateNamedObjects("${BLAS3_MANGLED_SOURCES}" "" "" ${CBLAS_FLAG} "" "" false ${MANGLE_COMPLEX}) #sdsdot, dsdot + if (BUILD_SINGLE OR BUILD_DOUBLE) GenerateNamedObjects("sdsdot.c" "" "sdsdot" ${CBLAS_FLAG} "" "" true "SINGLE") +endif () +if (BUILD_DOUBLE) GenerateNamedObjects("dsdot.c" "" "dsdot" ${CBLAS_FLAG} "" "" true "SINGLE") +endif () # trmm is trsm with a compiler flag set GenerateNamedObjects("trsm.c" "TRMM" "trmm" ${CBLAS_FLAG}) @@ -167,4 +171,31 @@ if (NOT DEFINED NO_LAPACK) GenerateNamedObjects("${LAPACK_MANGLED_SOURCES}" "" "" 0 "" "" 0 3) endif () +if ( BUILD_COMPLEX AND NOT BUILD_SINGLE) + GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "SINGLE") + GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "SINGLE") + GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "SINGLE") + GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "SINGLE") + GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "SINGLE") + GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "SINGLE") + GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "SINGLE") + GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "SINGLE") + GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "SINGLE") + GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "SINGLE") + GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "SINGLE") +endif () +if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("scal.c" "" "scal" 0 "" "" false "DOUBLE") + GenerateNamedObjects("copy.c" "" "copy" 0 "" "" false "DOUBLE") + GenerateNamedObjects("dot.c" "" "dot" 0 "" "" false "DOUBLE") + GenerateNamedObjects("rot.c" "" "rot" 0 "" "" false "DOUBLE") + GenerateNamedObjects("nrm2.c" "" "nrm2" 0 "" "" false "DOUBLE") + GenerateNamedObjects("gemv.c" "" "gemv" 0 "" "" false "DOUBLE") + GenerateNamedObjects("gemm.c" "" "gemm" 0 "" "" false "DOUBLE") + GenerateNamedObjects("asum.c" "" "asum" 0 "" "" false "DOUBLE") + GenerateNamedObjects("swap.c" "" "swap" 0 "" "" false "DOUBLE") + GenerateNamedObjects("axpy.c" "" "axpy" 0 "" "" false "DOUBLE") + GenerateNamedObjects("imax.c" "USE_ABS" "i*amax" 0 "" "" false "DOUBLE") +endif () + add_library(interface OBJECT ${OPENBLAS_SRC}) diff --git a/interface/Makefile b/interface/Makefile index fde6227bc..71393aaba 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -329,7 +329,10 @@ CCBLAS3OBJS = \ cblas_csyrk.$(SUFFIX) cblas_csyr2k.$(SUFFIX) \ cblas_chemm.$(SUFFIX) cblas_cherk.$(SUFFIX) cblas_cher2k.$(SUFFIX) \ cblas_comatcopy.$(SUFFIX) cblas_cimatcopy.$(SUFFIX)\ - cblas_cgeadd.$(SUFFIX) cblas_xerbla.$(SUFFIX) + cblas_cgeadd.$(SUFFIX) + +CXERBLAOBJ = \ + cblas_xerbla.$(SUFFIX) @@ -391,6 +394,8 @@ ZBLAS2OBJS += $(CZBLAS2OBJS) ZBLAS3OBJS += $(CZBLAS3OBJS) SHEXTOBJS += $(CSHEXTOBJS) + +CBAUXOBJS += $(CXERBLAOBJ) endif SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) @@ -434,13 +439,11 @@ QLAPACKOBJS = \ # cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ # clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) - CLAPACKOBJS = \ cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) ctrtrs.$(SUFFIX) - #ZLAPACKOBJS = \ # zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ # zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ @@ -469,8 +472,42 @@ ZBLASOBJS += $(ZLAPACKOBJS) endif -FUNCOBJS = $(SHEXTOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) +ifneq ($(BUILD_SINGLE),1) + SBLASOBJS= +ifeq ($(BUILD_DOUBLE),1) + SBLASOBJS = dsdot.$(SUFFIX) cblas_dsdot.$(SUFFIX) strsm.$(SUFFIX) \ + sgetrs.$(SUFFIX) sgetrf.$(SUFFIX) spotf2.$(SUFFIX) spotrf.$(SUFFIX) \ + ssyrk.$(SUFFIX) sgemv.$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX),1) + SBLASOBJS = \ + sdot.$(SUFFIX) srot.$(SUFFIX) snrm2.$(SUFFIX) sswap.$(SUFFIX) \ + isamax.$(SUFFIX) saxpy.$(SUFFIX) sscal.$(SUFFIX) scopy.$(SUFFIX) \ + sgemv.$(SUFFIX) sgemm.$(SUFFIX) +endif +endif +ifneq ($(BUILD_DOUBLE),1) + DBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) + DBLASOBJS = \ + ddot.$(SUFFIX) drot.$(SUFFIX) dnrm2.$(SUFFIX) dswap.$(SUFFIX) \ + idamax.$(SUFFIX) daxpy.$(SUFFIX) dscal.$(SUFFIX) dcopy.$(SUFFIX) \ + dgemv.$(SUFFIX) dgemm.$(SUFFIX) +endif +endif +ifneq ($(BUILD_COMPLEX),1) + CBLASOBJS= +ifeq ($(BUILD_COMPLEX16),1) + CBLASOBJS = cgetrs.$(SUFFIX) cblas_cdotu_sub.$(SUFFIX) cgetrf.$(SUFFIX) \ + cpotrf.$(SUFFIX) ctrsm.$(SUFFIX) cblas_cdotc_sub.$(SUFFIX) +endif +endif +ifneq ($(BUILD_COMPLEX16),1) + ZBLASOBJS= +endif +FUNCOBJS = $(SHEXTOBJS) $(CXERBLAOBJS) $(SHBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) +$(info FUNCOBJS = {[$(FUNCOBJS)]} ) ifdef EXPRECISION FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) endif @@ -481,6 +518,7 @@ endif FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) + include $(TOPDIR)/Makefile.tail all :: libs @@ -503,11 +541,14 @@ level1 : $(BEXTOBJS) $(SHBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $( level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ -level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) +level3 : $(SHBLAS3OBJS) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) + $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ + +aux : $(CBAUXOBJS) $(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ $(CSHBLASOBJS) $(CSHBLASOBJS_P) $(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ -$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS +$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) $(CBAUXOBJS_P) : override CFLAGS += -DCBLAS srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c $(CC) $(CFLAGS) -c $< -o $(@F) @@ -2268,3 +2309,4 @@ cblas_zgeadd.$(SUFFIX) cblas_zgeadd.$(PSUFFIX) : zgeadd.c cblas_xerbla.$(SUFFIX) cblas_xerbla.$(PSUFFIX) : xerbla.c $(CC) -c $(CFLAGS) -DCBLAS $< -o $(@F) + diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 84dd949a4..988b83338 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -91,6 +91,59 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "d*dot_k" false "" "" false "SINGLE") GenerateNamedObjects("${KERNELDIR}/${DSDOTKERNEL}" "DSDOT" "dsdot_k" false "" "" false "SINGLE") + if ((BUILD_COMPLEX OR BUILD_DOUBLE) AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SASUMKERNEL}" "" "asum_k" false "" "" false "SINGLE") + if (DEFINED SMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${SMAXKERNEL}" "" "max_k" false "" "" false "SINGLE") + endif () + if (DEFINED SMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${SMINKERNEL}" "USE_MIN" "min_k" false "" "" false "SINGLE") + endif () + if (DEFINED ISMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ISMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "SINGLE") + endif () + if (DEFINED ISMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ISMAXKERNEL}" "" "i*max_k" false "" "" false "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${ISAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${ISAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SSCALKERNEL}" "" "scal_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SSWAPKERNEL}" "" "swap_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SAXPYKERNEL}" "" "axpy_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") + endif () + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DASUMKERNEL}" "" "asum_k" false "" "" false "DOUBLE") + if (DEFINED DMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMAXKERNEL}" "" "max_k" false "" "" false "DOUBLE") + endif () + if (DEFINED DMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${DMINKERNEL}" "USE_MIN" "min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "DOUBLE") + endif () + if (DEFINED IDMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${IDMAXKERNEL}" "" "i*max_k" false "" "" false "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${IDAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${IDAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSCALKERNEL}" "" "scal_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") + endif () + # Makefile.L2 GenerateCombinationObjects("generic/symv_k.c" "LOWER" "U" "" 1 "" "" 3) GenerateNamedObjects("generic/ger.c" "" "ger_k" false "" "" "" 3) @@ -124,7 +177,14 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false ${float_type}) endif () endforeach () - + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DGEMVNKERNEL}" "" "gemv_n" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "DOUBLE") + endif () + if (BUILD_COMPLEX AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") + endif () # Makefile.L3 set(USE_TRMM false) if (ARM OR ARM64 OR (TARGET_CORE MATCHES LONGSOON3B) OR (TARGET_CORE MATCHES GENERIC) OR (TARGET_CORE MATCHES HASWELL) OR (TARGET_CORE MATCHES ZEN) OR (TARGET_CORE MATCHES SKYLAKEX) OR (TARGET_CORE MATCHES COOPERLAKE)) @@ -159,6 +219,38 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) endif () GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type}) endforeach() + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("${KERNELDIR}/${DGEMMKERNEL}" "" "gemm_kernel" false "" "" false "DOUBLE") + if (DGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMINCOPY}" "DOUBLE" "${DGEMMINCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMITCOPY}" "DOUBLE" "${DGEMMITCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMONCOPY}" "DOUBLE" "${DGEMMONCOPYOBJ}" false "" "" true "DOUBLE") + endif () + if (DGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${DGEMMOTCOPY}" "DOUBLE" "${DGEMMOTCOPYOBJ}" false "" "" true "DOUBLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${DGEMM_BETA}" "" "gemm_beta" false "" "" false "DOUBLE") + endif () + if ((BUILD_DOUBLE OR BUILD_COMPLEX) AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMMKERNEL}" "" "gemm_kernel" false "" "" false "SINGLE") + if (SGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${SGEMM_BETA}" "" "gemm_beta" false "" "" false "SINGLE") + endif () foreach (float_type ${FLOAT_TYPES}) string(SUBSTRING ${float_type} 0 1 float_char) @@ -499,7 +591,31 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) #geadd GenerateNamedObjects("${KERNELDIR}/${${float_char}GEADD_KERNEL}" "" "geadd_k" false "" "" false ${float_type}) endforeach () + if (BUILD_DOUBLE AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" "" false "SINGLE") + + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" "" false "SINGLE") + + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" "" false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" "" false "SINGLE") + endif () # Makefile.LA if(NOT NO_LAPACK) @@ -526,6 +642,28 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("${KERNELDIR}/${${float_char}NEG_TCOPY}_${${float_char}GEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false ${float_type}) GenerateNamedObjects("${KERNELDIR}/${${float_char}LASWP_NCOPY}_${${float_char}GEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false ${float_type}) endforeach() + if (BUILD_COMPLEX AND NOT BUILD_SINGLE) + if (NOT DEFINED SNEG_TCOPY) + set(SNEG_TCOPY ../generic/neg_tcopy.c) + endif () + + if (NOT DEFINED SLASWP_NCOPY) + set(SLASWP_NCOPY ../generic/laswp_ncopy.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${SNEG_TCOPY}_${SGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SLASWP_NCOPY}_${SGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "SINGLE") + endif() + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + if (NOT DEFINED DNEG_TCOPY) + set(DNEG_TCOPY ../generic/neg_tcopy.c) + endif () + + if (NOT DEFINED DLASWP_NCOPY) + set(DLASWP_NCOPY ../generic/laswp_ncopy.c) + endif () + GenerateNamedObjects("${KERNELDIR}/${DNEG_TCOPY}_${DGEMM_UNROLL_M}" "" "neg_tcopy" false "" "" false "DOUBLE") + GenerateNamedObjects("${KERNELDIR}/${DLASWP_NCOPY}_${DGEMM_UNROLL_N}" "" "laswp_ncopy" false "" "" false "DOUBLE") + endif() endif() if (${DYNAMIC_ARCH}) @@ -557,8 +695,147 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) GenerateNamedObjects("generic/laswp_ncopy_${${float_char}GEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false ${float_type}) endforeach () - + if (BUILD_COMPLEX AND NOT BUILD_SINGLE) + GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") + GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") endif () + if (BUILD_DOUBLE AND NOT BUILD_SINGLE) + GenerateNamedObjects("generic/neg_tcopy_${SGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/laswp_ncopy_${SGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${STRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_uncopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "SINGLE") + + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_lncopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "SINGLE") + + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_utcopy_${SGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "SINGLE") + + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "SINGLE") + GenerateNamedObjects("generic/trsm_ltcopy_${SGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "SINGLE") + + if (SGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMINCOPY}" "SINGLE" "${SGEMMINCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMITCOPY}" "SINGLE" "${SGEMMITCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMONCOPY}" "SINGLE" "${SGEMMONCOPYOBJ}" false "" "" true "SINGLE") + endif () + if (SGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${SGEMMOTCOPY}" "SINGLE" "${SGEMMOTCOPYOBJ}" false "" "" true "SINGLE") + endif () + GenerateNamedObjects("${KERNELDIR}/${SGEMVNKERNEL}" "" "gemv_n" false "" "" false "SINGLE") + GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE") + endif () + + if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) + GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "DOUBLE") + GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "DOUBLE") + endif () + if (BUILD_COMPLEX16 AND NOT BUILD_COMPLEX) + GenerateNamedObjects("${KERNELDIR}/${CAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAMINKERNEL}" "USE_ABS;USE_MIN" "amin_k" false "" "" false "COMPLEX") + if (DEFINED CMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${CMAXKERNEL}" "" "max_k" false "" "" false "COMPLEX") + endif () + if (DEFINED CMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${CMINKERNEL}" "USE_MIN" "min_k" false "" "" false "COMPLEX") + endif () + GenerateNamedObjects("${KERNELDIR}/${ICAMAXKERNEL}" "USE_ABS" "i*amax_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${ICAMINKERNEL}" "USE_ABS;USE_MIN" "i*amin_k" false "" "" false "COMPLEX") + if (DEFINED ICMAXKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ICMAXKERNEL}" "" "i*max_k" false "" "" false "COMPLEX") + endif () + if (DEFINED ICMINKERNEL) + GenerateNamedObjects("${KERNELDIR}/${ICMINKERNEL}" "USE_MIN" "i*min_k" false "" "" false "COMPLEX") + endif () + GenerateNamedObjects("${KERNELDIR}/${CASUMKERNEL}" "" "asum_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "" "axpy_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CNRM2KERNEL}" "" "nrm2_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CROTKERNEL}" "" "rot_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CSCALKERNEL}" "" "scal_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CSWAPKERNEL}" "" "swap_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAXPBYKERNEL}" "" "axpby_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CSUMKERNEL}" "" "sum_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CAXPYKERNEL}" "CONJ" "axpyc_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "" "dotu_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CDOTKERNEL}" "CONJ" "dotc_k" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "" "gemv_n" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "TRANSA" "gemv_t" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "CONJ" "gemv_r" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "CONJ;TRANSA" "gemv_c" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ" "gemv_o" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;TRANSA" "gemv_u" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVNKERNEL}" "XCONJ;CONJ" "gemv_s" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMVTKERNEL}" "XCONJ;CONJ;TRANSA" "gemv_d" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL;CONJ" "trsm_kernel_LR" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL;CONJ" "trsm_kernel_LC" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL;CONJ" "trsm_kernel_RR" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL;CONJ" "trsm_kernel_RC" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LN}" "UPPER;LN;TRSMKERNEL" "trsm_kernel_LN" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_LT}" "LT;TRSMKERNEL" "trsm_kernel_LT" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RN}" "UPPER;RN;TRSMKERNEL" "trsm_kernel_RN" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CTRSMKERNEL_RT}" "RT;TRSMKERNEL" "trsm_kernel_RT" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NN" "gemm_kernel_n" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CN" "gemm_kernel_l" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "NC" "gemm_kernel_r" false "" "" false "COMPLEX") + GenerateNamedObjects("${KERNELDIR}/${CGEMMKERNEL}" "CC" "gemm_kernel_b" false "" "" false "COMPLEX") + if (CGEMMINCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMINCOPY}" "COMPLEX" "${CGEMMINCOPYOBJ}" false "" "" true "COMPLEX") + endif () + + if (CGEMMITCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMITCOPY}" "COMPLEX" "${CGEMMITCOPYOBJ}" false "" "" true "COMPLEX") + endif () + + if (CGEMMONCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMONCOPY}" "COMPLEX" "${CGEMMONCOPYOBJ}" false "" "" true "COMPLEX") + endif () + + if (CGEMMOTCOPY) + GenerateNamedObjects("${KERNELDIR}/${CGEMMOTCOPY}" "COMPLEX" "${CGEMMOTCOPYOBJ}" false "" "" true "COMPLEX") + endif () + GenerateNamedObjects("${KERNELDIR}/${CGEMM_BETA}" "" "gemm_beta" false "" "" false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iunucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_M}.c" "" "trsm_iunncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_ounucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_uncopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_ounncopy" false "" ${TSUFFIX} false "COMPLEX") + + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_ilnucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_ilnncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_olnucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_lncopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_olnncopy" false "" ${TSUFFIX} false "COMPLEX") + + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "UNIT" "trsm_iutucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_M}.c" "" "trsm_iutncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER;UNIT" "trsm_outucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_utcopy_${CGEMM_UNROLL_N}.c" "OUTER" "trsm_outncopy" false "" ${TSUFFIX} false "COMPLEX") + + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER;UNIT" "trsm_iltucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_M}.c" "LOWER" "trsm_iltncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER;UNIT" "trsm_oltucopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/ztrsm_ltcopy_${CGEMM_UNROLL_N}.c" "OUTER;LOWER" "trsm_oltncopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/neg_tcopy_${DGEMM_UNROLL_M}.c" "" "neg_tcopy" false "" ${TSUFFIX} false "COMPLEX") + GenerateNamedObjects("generic/laswp_ncopy_${DGEMM_UNROLL_N}.c" "" "laswp_ncopy" false "" ${TSUFFIX} false "COMPLEX") + endif () + endif () add_library(kernel${TSUFFIX} OBJECT ${OPENBLAS_SRC}) set_target_properties(kernel${TSUFFIX} PROPERTIES COMPILE_FLAGS "${KERNEL_DEFINITIONS}") @@ -573,7 +850,7 @@ if (${DYNAMIC_ARCH}) set(BUILD_KERNEL 1) set(KDIR "") set(TSUFFIX "_${TARGET_CORE}") - set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") + set(KERNEL_DEFINITIONS "-DBUILD_KERNEL -DTABLE_NAME=gotoblas_${TARGET_CORE} -DTS=${TSUFFIX}") build_core("${TARGET_CORE}" "${KDIR}" "${TSUFFIX}" "${KERNEL_DEFINITIONS}") set(ADD_COMMONOBJS 0) endforeach() diff --git a/kernel/Makefile b/kernel/Makefile index 0f0fa5a5e..290fb2afe 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,6 +41,9 @@ ifdef NO_AVX2 endif ifdef TARGET_CORE + ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3)) + override CFLAGS += -msse3 +endif ifeq ($(TARGET_CORE), COOPERLAKE) override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE) ifeq ($(GCCVERSIONGTEQ10), 1) diff --git a/kernel/Makefile.L2 b/kernel/Makefile.L2 index 2aeb8f041..79399c342 100644 --- a/kernel/Makefile.L2 +++ b/kernel/Makefile.L2 @@ -186,31 +186,46 @@ ifndef XHEMV_M_KERNEL XHEMV_M_KERNEL = ../generic/zhemv_k.c endif +ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" SBLASOBJS += \ - sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \ + sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_SINGLE),1) +SBLASOBJS += \ + ssymv_U$(TSUFFIX).$(SUFFIX) ssymv_L$(TSUFFIX).$(SUFFIX) \ sger_k$(TSUFFIX).$(SUFFIX) - +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS += \ dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) dsymv_U$(TSUFFIX).$(SUFFIX) dsymv_L$(TSUFFIX).$(SUFFIX) \ dger_k$(TSUFFIX).$(SUFFIX) - +endif QBLASOBJS += \ qgemv_n$(TSUFFIX).$(SUFFIX) qgemv_t$(TSUFFIX).$(SUFFIX) qsymv_U$(TSUFFIX).$(SUFFIX) qsymv_L$(TSUFFIX).$(SUFFIX) \ qger_k$(TSUFFIX).$(SUFFIX) - +ifeq ($(BUILD_COMPLEX),1) +SBLASOBJS += \ + sgemv_n$(TSUFFIX).$(SUFFIX) sgemv_t$(TSUFFIX).$(SUFFIX) CBLASOBJS += \ cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \ cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) \ csymv_U$(TSUFFIX).$(SUFFIX) csymv_L$(TSUFFIX).$(SUFFIX) \ chemv_U$(TSUFFIX).$(SUFFIX) chemv_L$(TSUFFIX).$(SUFFIX) chemv_V$(TSUFFIX).$(SUFFIX) chemv_M$(TSUFFIX).$(SUFFIX) \ cgeru_k$(TSUFFIX).$(SUFFIX) cgerc_k$(TSUFFIX).$(SUFFIX) cgerv_k$(TSUFFIX).$(SUFFIX) cgerd_k$(TSUFFIX).$(SUFFIX) - +endif +ifeq ($(BUILD_COMPLEX16),1) +CBLASOBJS += \ + cgemv_n$(TSUFFIX).$(SUFFIX) cgemv_t$(TSUFFIX).$(SUFFIX) cgemv_r$(TSUFFIX).$(SUFFIX) cgemv_c$(TSUFFIX).$(SUFFIX) \ + cgemv_o$(TSUFFIX).$(SUFFIX) cgemv_u$(TSUFFIX).$(SUFFIX) cgemv_s$(TSUFFIX).$(SUFFIX) cgemv_d$(TSUFFIX).$(SUFFIX) +DBLASOBJS += \ + dgemv_n$(TSUFFIX).$(SUFFIX) dgemv_t$(TSUFFIX).$(SUFFIX) ZBLASOBJS += \ zgemv_n$(TSUFFIX).$(SUFFIX) zgemv_t$(TSUFFIX).$(SUFFIX) zgemv_r$(TSUFFIX).$(SUFFIX) zgemv_c$(TSUFFIX).$(SUFFIX) \ zgemv_o$(TSUFFIX).$(SUFFIX) zgemv_u$(TSUFFIX).$(SUFFIX) zgemv_s$(TSUFFIX).$(SUFFIX) zgemv_d$(TSUFFIX).$(SUFFIX) \ zsymv_U$(TSUFFIX).$(SUFFIX) zsymv_L$(TSUFFIX).$(SUFFIX) \ zhemv_U$(TSUFFIX).$(SUFFIX) zhemv_L$(TSUFFIX).$(SUFFIX) zhemv_V$(TSUFFIX).$(SUFFIX) zhemv_M$(TSUFFIX).$(SUFFIX) \ zgeru_k$(TSUFFIX).$(SUFFIX) zgerc_k$(TSUFFIX).$(SUFFIX) zgerv_k$(TSUFFIX).$(SUFFIX) zgerd_k$(TSUFFIX).$(SUFFIX) +endif XBLASOBJS += \ xgemv_n$(TSUFFIX).$(SUFFIX) xgemv_t$(TSUFFIX).$(SUFFIX) xgemv_r$(TSUFFIX).$(SUFFIX) xgemv_c$(TSUFFIX).$(SUFFIX) \ @@ -219,17 +234,21 @@ XBLASOBJS += \ xhemv_U$(TSUFFIX).$(SUFFIX) xhemv_L$(TSUFFIX).$(SUFFIX) xhemv_V$(TSUFFIX).$(SUFFIX) xhemv_M$(TSUFFIX).$(SUFFIX) \ xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX) +ifneq "$(or $(BUILD_SINGLE), $(BUILD_DOUBLE), $(BUILD_COMPLEX))" "" $(KDIR)sgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -UTRANS $< -o $@ $(KDIR)sgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -UDOUBLE -UCOMPLEX -DTRANS $< -o $@ +endif +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" $(KDIR)dgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -UTRANS $< -o $@ $(KDIR)dgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)dgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -DDOUBLE -UCOMPLEX -DTRANS $< -o $@ +endif $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVNKERNEL) $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -UTRANS $< -o $@ @@ -237,6 +256,8 @@ $(KDIR)qgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_n$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)qgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)qgemv_t$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGEMVTKERNEL) $(CC) -c $(CFLAGS) -DXDOUBLE -UCOMPLEX -DTRANS $< -o $@ + +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" $(KDIR)cgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ @@ -260,6 +281,10 @@ $(KDIR)cgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE $(KDIR)cgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)cgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -UDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ +endif + + +ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVNKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ @@ -284,6 +309,7 @@ $(KDIR)zgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE $(KDIR)zgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)zgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGEMVTKERNEL) $(TOPDIR)/common.h $(GEMVDEP) $(CC) -c $(CFLAGS) -DDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ +endif $(KDIR)xgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_n$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVNKERNEL) $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -UTRANS -UCONJ -UXCONJ $< -o $@ @@ -309,17 +335,25 @@ $(KDIR)xgemv_s$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_s$(TSUFFIX).$(PSUFFIX) : $(KERNE $(KDIR)xgemv_d$(TSUFFIX).$(SUFFIX) $(KDIR)xgemv_d$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMVTKERNEL) $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANS -DCONJ -DXCONJ $< -o $@ + +ifeq ($(BUILD_SINGLE),1) + $(KDIR)ssymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_U_KERNEL) $(SSYMV_U_PARAM) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -ULOWER $< -o $@ $(KDIR)ssymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)ssymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SSYMV_L_KERNEL) $(SSYMV_L_PARAM) $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DLOWER $< -o $@ +endif + + +ifeq ($(BUILD_DOUBLE),1) $(KDIR)dsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_U_KERNEL) $(DSYMV_U_PARAM) $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -ULOWER $< -o $@ $(KDIR)dsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)dsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DSYMV_L_KERNEL) $(DSYMV_L_PARAM) $(CC) -c $(CFLAGS) -UCOMPLEX -DDOUBLE -DLOWER $< -o $@ +endif $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_U_KERNEL) $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -ULOWER $< -o $@ @@ -327,17 +361,23 @@ $(KDIR)qsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)qsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)qsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QSYMV_L_KERNEL) $(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE -DLOWER $< -o $@ +ifeq ($(BUILD_COMPLEX),1) + $(KDIR)csymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_U_KERNEL) $(CSYMV_U_PARAM) $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER $< -o $@ $(KDIR)csymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)csymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CSYMV_L_KERNEL) $(CSYMV_L_PARAM) $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER $< -o $@ +endif + +ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_U_KERNEL) $(ZSYMV_U_PARAM) $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER $< -o $@ $(KDIR)zsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)zsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZSYMV_L_KERNEL) $(ZSYMV_L_PARAM) $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER $< -o $@ +endif $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_U_KERNEL) $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER $< -o $@ @@ -345,15 +385,23 @@ $(KDIR)xsymv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_U$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)xsymv_L$(TSUFFIX).$(SUFFIX) $(KDIR)xsymv_L$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XSYMV_L_KERNEL) $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER $< -o $@ +ifeq ($(BUILD_SINGLE),1) + $(KDIR)sger_k$(TSUFFIX).$(SUFFIX) $(KDIR)sger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGERKERNEL) $(SGERPARAM) $(CC) -c $(CFLAGS) -UDOUBLE $< -o $@ +endif + +ifeq ($(BUILD_DOUBLE),1) $(KDIR)dger_k$(TSUFFIX).$(SUFFIX) $(KDIR)dger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGERKERNEL) $(DGERPARAM) $(CC) -c $(CFLAGS) -DDOUBLE $< -o $@ +endif $(KDIR)qger_k$(TSUFFIX).$(SUFFIX) $(KDIR)qger_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QGERKERNEL) $(QGERPARAM) $(CC) -c $(CFLAGS) -DXDOUBLE $< -o $@ +ifeq ($(BUILD_COMPLEX),1) + $(KDIR)cgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERUKERNEL) $(CGERPARAM) $(CC) -c $(CFLAGS) -UDOUBLE -UCONJ $< -o $@ @@ -365,6 +413,9 @@ $(KDIR)cgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)cgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)cgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CGERCKERNEL) $(CGERPARAM) $(CC) -c $(CFLAGS) -UDOUBLE -DCONJ -DXCONJ $< -o $@ +endif + +ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERUKERNEL) $(ZGERPARAM) $(CC) -c $(CFLAGS) -DDOUBLE -UCONJ $< -o $@ @@ -377,6 +428,7 @@ $(KDIR)zgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)zgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)zgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZGERCKERNEL) $(ZGERPARAM) $(CC) -c $(CFLAGS) -DDOUBLE -DCONJ -DXCONJ $< -o $@ +endif $(KDIR)xgeru_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgeru_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERUKERNEL) $(XGERPARAM) $(CC) -c $(CFLAGS) -DXDOUBLE -UCONJ $< -o $@ @@ -390,6 +442,8 @@ $(KDIR)xgerv_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerv_k$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)xgerd_k$(TSUFFIX).$(SUFFIX) $(KDIR)xgerd_k$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGERCKERNEL) $(XGERPARAM) $(CC) -c $(CFLAGS) -DXDOUBLE -DCONJ -DXCONJ $< -o $@ +ifeq ($(BUILD_COMPLEX),1) + $(KDIR)chemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_U_KERNEL) $(CHEMV_U_PARAM) $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -ULOWER -DHEMV $< -o $@ @@ -401,6 +455,9 @@ $(KDIR)chemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_V$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)chemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)chemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CHEMV_M_KERNEL) $(CHEMV_L_PARAM) ../symcopy.h $(CC) -c $(CFLAGS) -DCOMPLEX -UDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ +endif + +ifeq ($(BUILD_COMPLEX16),1) $(KDIR)zhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_U_KERNEL) $(ZHEMV_U_PARAM) $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -ULOWER -DHEMV $< -o $@ @@ -413,7 +470,7 @@ $(KDIR)zhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)zhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)zhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(ZHEMV_M_KERNEL) $(ZHEMV_L_PARAM) ../symcopy.h $(CC) -c $(CFLAGS) -DCOMPLEX -DDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ - +endif $(KDIR)xhemv_U$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_U$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_U_KERNEL) $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -ULOWER -DHEMV $< -o $@ @@ -426,3 +483,4 @@ $(KDIR)xhemv_V$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_V$(TSUFFIX).$(PSUFFIX) : $(KER $(KDIR)xhemv_M$(TSUFFIX).$(SUFFIX) $(KDIR)xhemv_M$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XHEMV_M_KERNEL) ../symcopy.h $(CC) -c $(CFLAGS) -DCOMPLEX -DXDOUBLE -DLOWER -DHEMV -DHEMVREV $< -o $@ + diff --git a/kernel/Makefile.L3 b/kernel/Makefile.L3 index 24e17d9b4..e03ed0fad 100644 --- a/kernel/Makefile.L3 +++ b/kernel/Makefile.L3 @@ -100,8 +100,10 @@ SHKERNELOBJS += \ $(SHGEMMONCOPYOBJ) $(SHGEMMOTCOPYOBJ) endif +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" "" SKERNELOBJS += \ sgemm_kernel$(TSUFFIX).$(SUFFIX) \ + sgemm_beta$(TSUFFIX).$(SUFFIX) \ $(SGEMMINCOPYOBJ) $(SGEMMITCOPYOBJ) \ $(SGEMMONCOPYOBJ) $(SGEMMOTCOPYOBJ) @@ -110,28 +112,36 @@ SKERNELOBJS += \ sgemm_direct$(TSUFFIX).$(SUFFIX) \ sgemm_direct_performant$(TSUFFIX).$(SUFFIX) endif +endif +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" DKERNELOBJS += \ + dgemm_beta$(TSUFFIX).$(SUFFIX) \ dgemm_kernel$(TSUFFIX).$(SUFFIX) \ $(DGEMMINCOPYOBJ) $(DGEMMITCOPYOBJ) \ $(DGEMMONCOPYOBJ) $(DGEMMOTCOPYOBJ) +endif QKERNELOBJS += \ qgemm_kernel$(TSUFFIX).$(SUFFIX) \ $(QGEMMINCOPYOBJ) $(QGEMMITCOPYOBJ) \ $(QGEMMONCOPYOBJ) $(QGEMMOTCOPYOBJ) +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" CKERNELOBJS += \ cgemm_kernel_n$(TSUFFIX).$(SUFFIX) cgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ cgemm_kernel_l$(TSUFFIX).$(SUFFIX) cgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ $(CGEMMINCOPYOBJ) $(CGEMMITCOPYOBJ) \ $(CGEMMONCOPYOBJ) $(CGEMMOTCOPYOBJ) +endif +ifeq ($(BUILD_COMPLEX16),1) ZKERNELOBJS += \ zgemm_kernel_n$(TSUFFIX).$(SUFFIX) zgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ zgemm_kernel_l$(TSUFFIX).$(SUFFIX) zgemm_kernel_b$(TSUFFIX).$(SUFFIX) \ $(ZGEMMINCOPYOBJ) $(ZGEMMITCOPYOBJ) \ $(ZGEMMONCOPYOBJ) $(ZGEMMOTCOPYOBJ) +endif XKERNELOBJS += \ xgemm_kernel_n$(TSUFFIX).$(SUFFIX) xgemm_kernel_r$(TSUFFIX).$(SUFFIX) \ @@ -153,38 +163,48 @@ ifeq ($(BUILD_HALF),1) SHBLASOBJS += shgemm_beta$(TSUFFIX).$(SUFFIX) endif +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" SBLASOBJS += \ sgemm_beta$(TSUFFIX).$(SUFFIX) \ strmm_kernel_LN$(TSUFFIX).$(SUFFIX) strmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ strmm_kernel_RN$(TSUFFIX).$(SUFFIX) strmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ strsm_kernel_LN$(TSUFFIX).$(SUFFIX) strsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ + strsm_kernel_RN$(TSUFFIX).$(SUFFIX) strsm_kernel_RT$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS += \ dgemm_beta$(TSUFFIX).$(SUFFIX) \ dtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ dtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ dtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ + dtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) dtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) +endif QBLASOBJS += \ qgemm_beta$(TSUFFIX).$(SUFFIX) \ qtrmm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ qtrmm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ qtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ - qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ + qtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) qtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) +ifeq ($(BUILD_COMPLEX),1) CBLASOBJS += \ - cgemm_beta$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_LC$(TSUFFIX).$(SUFFIX) \ ctrmm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) \ + ctrmm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrmm_kernel_RC$(TSUFFIX).$(SUFFIX) +endif +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" +CBLASOBJS += \ + cgemm_beta$(TSUFFIX).$(SUFFIX) \ ctrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ctrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ ctrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ + ctrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ctrsm_kernel_RC$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS += \ zgemm_beta$(TSUFFIX).$(SUFFIX) \ ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) \ @@ -194,7 +214,8 @@ ZBLASOBJS += \ ztrsm_kernel_LN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ ztrsm_kernel_LR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ ztrsm_kernel_RN$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ + ztrsm_kernel_RR$(TSUFFIX).$(SUFFIX) ztrsm_kernel_RC$(TSUFFIX).$(SUFFIX) +endif XBLASOBJS += \ xgemm_beta$(TSUFFIX).$(SUFFIX) \ @@ -205,7 +226,7 @@ XBLASOBJS += \ xtrsm_kernel_LN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LT$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_LR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_LC$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ - xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ + xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) ifeq ($(USE_GEMM3M), 1) @@ -215,6 +236,7 @@ XBLASOBJS += xgemm3m_kernel$(TSUFFIX).$(SUFFIX) endif +ifeq ($(BUILD_SINGLE),1) SBLASOBJS += \ strmm_iunucopy$(TSUFFIX).$(SUFFIX) strmm_iunncopy$(TSUFFIX).$(SUFFIX) \ strmm_ilnucopy$(TSUFFIX).$(SUFFIX) strmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ @@ -223,7 +245,10 @@ SBLASOBJS += \ strmm_ounucopy$(TSUFFIX).$(SUFFIX) strmm_ounncopy$(TSUFFIX).$(SUFFIX) \ strmm_olnucopy$(TSUFFIX).$(SUFFIX) strmm_olnncopy$(TSUFFIX).$(SUFFIX) \ strmm_outucopy$(TSUFFIX).$(SUFFIX) strmm_outncopy$(TSUFFIX).$(SUFFIX) \ - strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) \ + strmm_oltucopy$(TSUFFIX).$(SUFFIX) strmm_oltncopy$(TSUFFIX).$(SUFFIX) +endif +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" +SBLASOBJS += \ strsm_iunucopy$(TSUFFIX).$(SUFFIX) strsm_iunncopy$(TSUFFIX).$(SUFFIX) \ strsm_ilnucopy$(TSUFFIX).$(SUFFIX) strsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ strsm_iutucopy$(TSUFFIX).$(SUFFIX) strsm_iutncopy$(TSUFFIX).$(SUFFIX) \ @@ -231,10 +256,15 @@ SBLASOBJS += \ strsm_ounucopy$(TSUFFIX).$(SUFFIX) strsm_ounncopy$(TSUFFIX).$(SUFFIX) \ strsm_olnucopy$(TSUFFIX).$(SUFFIX) strsm_olnncopy$(TSUFFIX).$(SUFFIX) \ strsm_outucopy$(TSUFFIX).$(SUFFIX) strsm_outncopy$(TSUFFIX).$(SUFFIX) \ - strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) \ + strsm_oltucopy$(TSUFFIX).$(SUFFIX) strsm_oltncopy$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_SINGLE),1) +SBLASOBJS += \ ssymm_iutcopy$(TSUFFIX).$(SUFFIX) ssymm_iltcopy$(TSUFFIX).$(SUFFIX) \ ssymm_outcopy$(TSUFFIX).$(SUFFIX) ssymm_oltcopy$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS += \ dtrmm_iunucopy$(TSUFFIX).$(SUFFIX) dtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ dtrmm_ilnucopy$(TSUFFIX).$(SUFFIX) dtrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ @@ -254,6 +284,7 @@ DBLASOBJS += \ dtrsm_oltucopy$(TSUFFIX).$(SUFFIX) dtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ dsymm_iutcopy$(TSUFFIX).$(SUFFIX) dsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ dsymm_outcopy$(TSUFFIX).$(SUFFIX) dsymm_oltcopy$(TSUFFIX).$(SUFFIX) +endif QBLASOBJS += \ qtrmm_iunucopy$(TSUFFIX).$(SUFFIX) qtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ @@ -273,8 +304,9 @@ QBLASOBJS += \ qtrsm_outucopy$(TSUFFIX).$(SUFFIX) qtrsm_outncopy$(TSUFFIX).$(SUFFIX) \ qtrsm_oltucopy$(TSUFFIX).$(SUFFIX) qtrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ qsymm_iutcopy$(TSUFFIX).$(SUFFIX) qsymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ + qsymm_outcopy$(TSUFFIX).$(SUFFIX) qsymm_oltcopy$(TSUFFIX).$(SUFFIX) +ifeq ($(BUILD_COMPLEX),1) CBLASOBJS += \ ctrmm_iunucopy$(TSUFFIX).$(SUFFIX) ctrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ @@ -284,6 +316,13 @@ CBLASOBJS += \ ctrmm_olnucopy$(TSUFFIX).$(SUFFIX) ctrmm_olnncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_outucopy$(TSUFFIX).$(SUFFIX) ctrmm_outncopy$(TSUFFIX).$(SUFFIX) \ ctrmm_oltucopy$(TSUFFIX).$(SUFFIX) ctrmm_oltncopy$(TSUFFIX).$(SUFFIX) \ + csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ + csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \ + chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \ + chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX) +endif +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" +CBLASOBJS += \ ctrsm_iunucopy$(TSUFFIX).$(SUFFIX) ctrsm_iunncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_ilnucopy$(TSUFFIX).$(SUFFIX) ctrsm_ilnncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_iutucopy$(TSUFFIX).$(SUFFIX) ctrsm_iutncopy$(TSUFFIX).$(SUFFIX) \ @@ -291,12 +330,10 @@ CBLASOBJS += \ ctrsm_ounucopy$(TSUFFIX).$(SUFFIX) ctrsm_ounncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_olnucopy$(TSUFFIX).$(SUFFIX) ctrsm_olnncopy$(TSUFFIX).$(SUFFIX) \ ctrsm_outucopy$(TSUFFIX).$(SUFFIX) ctrsm_outncopy$(TSUFFIX).$(SUFFIX) \ - ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) \ - csymm_iutcopy$(TSUFFIX).$(SUFFIX) csymm_iltcopy$(TSUFFIX).$(SUFFIX) \ - csymm_outcopy$(TSUFFIX).$(SUFFIX) csymm_oltcopy$(TSUFFIX).$(SUFFIX) \ - chemm_iutcopy$(TSUFFIX).$(SUFFIX) chemm_iltcopy$(TSUFFIX).$(SUFFIX) \ - chemm_outcopy$(TSUFFIX).$(SUFFIX) chemm_oltcopy$(TSUFFIX).$(SUFFIX) + ctrsm_oltucopy$(TSUFFIX).$(SUFFIX) ctrsm_oltncopy$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS += \ ztrmm_iunucopy$(TSUFFIX).$(SUFFIX) ztrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ ztrmm_ilnucopy$(TSUFFIX).$(SUFFIX) ztrmm_ilnncopy$(TSUFFIX).$(SUFFIX) \ @@ -318,6 +355,7 @@ ZBLASOBJS += \ zsymm_outcopy$(TSUFFIX).$(SUFFIX) zsymm_oltcopy$(TSUFFIX).$(SUFFIX) \ zhemm_iutcopy$(TSUFFIX).$(SUFFIX) zhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ zhemm_outcopy$(TSUFFIX).$(SUFFIX) zhemm_oltcopy$(TSUFFIX).$(SUFFIX) +endif XBLASOBJS += \ xtrmm_iunucopy$(TSUFFIX).$(SUFFIX) xtrmm_iunncopy$(TSUFFIX).$(SUFFIX) \ @@ -343,6 +381,7 @@ XBLASOBJS += \ ifeq ($(USE_GEMM3M), 1) +ifeq ($(BUILD_COMPLEX),1) CBLASOBJS += \ cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ cgemm3m_incopyr$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ @@ -362,7 +401,9 @@ CBLASOBJS += \ chemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) chemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ chemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) chemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ chemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) chemm3m_olcopyi$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS += \ zgemm3m_incopyb$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ zgemm3m_incopyr$(TSUFFIX).$(SUFFIX) zgemm3m_itcopyr$(TSUFFIX).$(SUFFIX) \ @@ -382,6 +423,7 @@ ZBLASOBJS += \ zhemm3m_ilcopyb$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyb$(TSUFFIX).$(SUFFIX) \ zhemm3m_ilcopyr$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyr$(TSUFFIX).$(SUFFIX) \ zhemm3m_ilcopyi$(TSUFFIX).$(SUFFIX) zhemm3m_olcopyi$(TSUFFIX).$(SUFFIX) +endif XBLASOBJS += \ xgemm3m_incopyb$(TSUFFIX).$(SUFFIX) xgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ @@ -406,20 +448,25 @@ XBLASOBJS += \ endif ###### BLAS extensions ##### + +ifeq ($(BUILD_SINGLE),1) SBLASOBJS += \ somatcopy_k_cn$(TSUFFIX).$(SUFFIX) somatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ somatcopy_k_ct$(TSUFFIX).$(SUFFIX) somatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ simatcopy_k_cn$(TSUFFIX).$(SUFFIX) simatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ simatcopy_k_ct$(TSUFFIX).$(SUFFIX) simatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ sgeadd_k$(TSUFFIX).$(SUFFIX) - +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS += \ domatcopy_k_cn$(TSUFFIX).$(SUFFIX) domatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ domatcopy_k_ct$(TSUFFIX).$(SUFFIX) domatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ dimatcopy_k_cn$(TSUFFIX).$(SUFFIX) dimatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ dimatcopy_k_ct$(TSUFFIX).$(SUFFIX) dimatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ dgeadd_k$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX),1) CBLASOBJS += \ comatcopy_k_cn$(TSUFFIX).$(SUFFIX) comatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ comatcopy_k_ct$(TSUFFIX).$(SUFFIX) comatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ @@ -430,7 +477,9 @@ CBLASOBJS += \ cimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ cimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) cimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ cgeadd_k$(TSUFFIX).$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS += \ zomatcopy_k_cn$(TSUFFIX).$(SUFFIX) zomatcopy_k_rn$(TSUFFIX).$(SUFFIX) \ zomatcopy_k_ct$(TSUFFIX).$(SUFFIX) zomatcopy_k_rt$(TSUFFIX).$(SUFFIX) \ @@ -441,6 +490,7 @@ ZBLASOBJS += \ zimatcopy_k_cnc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rnc$(TSUFFIX).$(SUFFIX) \ zimatcopy_k_ctc$(TSUFFIX).$(SUFFIX) zimatcopy_k_rtc$(TSUFFIX).$(SUFFIX) \ zgeadd_k$(TSUFFIX).$(SUFFIX) +endif ifeq ($(BUILD_HALF), 1) SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX)) diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index c43520310..dd49d8e4e 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -114,6 +114,7 @@ gotoblas_t TABLE_NAME = { #endif #endif +#if ( BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) 0, 0, 0, SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, #ifdef SGEMM_DEFAULT_UNROLL_MN @@ -121,7 +122,7 @@ gotoblas_t TABLE_NAME = { #else MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), #endif - +#endif #ifdef HAVE_EXCLUSIVE_CACHE 1, @@ -129,19 +130,38 @@ gotoblas_t TABLE_NAME = { 0, #endif +#if (BUILD_SINGLE==1 ) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) samax_kTS, samin_kTS, smax_kTS, smin_kTS, isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, - snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sdot_kTS, - dsdot_kTS, - srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, - sgemv_nTS, sgemv_tTS, sger_kTS, - ssymv_LTS, ssymv_UTS, + snrm2_kTS, sasum_kTS, +#endif +#if BUILD_SINGLE == 1 + ssum_kTS, +#endif +#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) + scopy_kTS, sdot_kTS, +// dsdot_kTS, + srot_kTS, saxpy_kTS, +#endif +#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) + sscal_kTS, +#endif +#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) + sswap_kTS, + sgemv_nTS, sgemv_tTS, +#endif +#if BUILD_SINGLE == 1 + sger_kTS, + ssymv_LTS, ssymv_UTS, +#endif + +#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) #ifdef ARCH_X86_64 sgemm_directTS, sgemm_direct_performantTS, #endif - + sgemm_kernelTS, sgemm_betaTS, #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N sgemm_incopyTS, sgemm_itcopyTS, @@ -149,6 +169,9 @@ gotoblas_t TABLE_NAME = { sgemm_oncopyTS, sgemm_otcopyTS, #endif sgemm_oncopyTS, sgemm_otcopyTS, +#endif + +#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) strsm_kernel_LNTS, strsm_kernel_LTTS, strsm_kernel_RNTS, strsm_kernel_RTTS, #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N strsm_iunucopyTS, strsm_iunncopyTS, strsm_iutucopyTS, strsm_iutncopyTS, @@ -159,6 +182,8 @@ gotoblas_t TABLE_NAME = { #endif strsm_ounucopyTS, strsm_ounncopyTS, strsm_outucopyTS, strsm_outncopyTS, strsm_olnucopyTS, strsm_olnncopyTS, strsm_oltucopyTS, strsm_oltncopyTS, +#endif +#if BUILD_SINGLE == 1 strmm_kernel_RNTS, strmm_kernel_RTTS, strmm_kernel_LNTS, strmm_kernel_LTTS, #if SGEMM_DEFAULT_UNROLL_M != SGEMM_DEFAULT_UNROLL_N strmm_iunucopyTS, strmm_iunncopyTS, strmm_iutucopyTS, strmm_iutncopyTS, @@ -175,13 +200,16 @@ gotoblas_t TABLE_NAME = { ssymm_outcopyTS, ssymm_oltcopyTS, #endif ssymm_outcopyTS, ssymm_oltcopyTS, - +#endif +#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) #ifndef NO_LAPACK sneg_tcopyTS, slaswp_ncopyTS, #else NULL,NULL, #endif +#endif +#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) 0, 0, 0, DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, #ifdef DGEMM_DEFAULT_UNROLL_MN @@ -189,14 +217,36 @@ gotoblas_t TABLE_NAME = { #else MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), #endif +#endif + +#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, - dnrm2_kTS, dasum_kTS, dsum_kTS, dcopy_kTS, ddot_kTS, - drot_kTS, daxpy_kTS, dscal_kTS, dswap_kTS, - dgemv_nTS, dgemv_tTS, dger_kTS, + dnrm2_kTS, dasum_kTS, +#endif +#if (BUILD_DOUBLE==1) + dsum_kTS, +#endif +#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) + dcopy_kTS, ddot_kTS, +#endif +#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) + dsdot_kTS, +#endif +#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) + drot_kTS, + daxpy_kTS, + dscal_kTS, + dswap_kTS, + dgemv_nTS, dgemv_tTS, +#endif +#if (BUILD_DOUBLE==1) + dger_kTS, dsymv_LTS, dsymv_UTS, +#endif +#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) dgemm_kernelTS, dgemm_betaTS, #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N dgemm_incopyTS, dgemm_itcopyTS, @@ -204,6 +254,9 @@ gotoblas_t TABLE_NAME = { dgemm_oncopyTS, dgemm_otcopyTS, #endif dgemm_oncopyTS, dgemm_otcopyTS, +#endif + +#if (BUILD_DOUBLE==1) dtrsm_kernel_LNTS, dtrsm_kernel_LTTS, dtrsm_kernel_RNTS, dtrsm_kernel_RTTS, #if DGEMM_DEFAULT_UNROLL_M != DGEMM_DEFAULT_UNROLL_N dtrsm_iunucopyTS, dtrsm_iunncopyTS, dtrsm_iutucopyTS, dtrsm_iutncopyTS, @@ -237,6 +290,8 @@ gotoblas_t TABLE_NAME = { NULL, NULL, #endif +#endif + #ifdef EXPRECISION 0, 0, 0, @@ -291,6 +346,7 @@ gotoblas_t TABLE_NAME = { #endif +#if (BUILD_COMPLEX || BUILD_COMPLEX16) 0, 0, 0, CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, #ifdef CGEMM_DEFAULT_UNROLL_MN @@ -298,21 +354,34 @@ gotoblas_t TABLE_NAME = { #else MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), #endif - camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, - cnrm2_kTS, casum_kTS, csum_kTS, ccopy_kTS, - cdotu_kTS, cdotc_kTS, csrot_kTS, - caxpy_kTS, caxpyc_kTS, cscal_kTS, cswap_kTS, +#endif +#if (BUILD_COMPLEX) + cnrm2_kTS, casum_kTS, csum_kTS, +#endif +#if (BUILD_COMPLEX || BUILD_COMPLEX16) + ccopy_kTS, cdotu_kTS, cdotc_kTS, +#endif +#if (BUILD_COMPLEX) + csrot_kTS, +#endif +#if (BUILD_COMPLEX || BUILD_COMPLEX16) + caxpy_kTS, + caxpyc_kTS, + cscal_kTS, + cswap_kTS, cgemv_nTS, cgemv_tTS, cgemv_rTS, cgemv_cTS, cgemv_oTS, cgemv_uTS, cgemv_sTS, cgemv_dTS, +#endif +#if (BUILD_COMPLEX) cgeru_kTS, cgerc_kTS, cgerv_kTS, cgerd_kTS, csymv_LTS, csymv_UTS, chemv_LTS, chemv_UTS, chemv_MTS, chemv_VTS, - +#endif +#if (BUILD_COMPLEX || BUILD_COMPLEX16) cgemm_kernel_nTS, cgemm_kernel_lTS, cgemm_kernel_rTS, cgemm_kernel_bTS, cgemm_betaTS, - #if CGEMM_DEFAULT_UNROLL_M != CGEMM_DEFAULT_UNROLL_N cgemm_incopyTS, cgemm_itcopyTS, #else @@ -332,6 +401,8 @@ gotoblas_t TABLE_NAME = { #endif ctrsm_ounucopyTS, ctrsm_ounncopyTS, ctrsm_outucopyTS, ctrsm_outncopyTS, ctrsm_olnucopyTS, ctrsm_olnncopyTS, ctrsm_oltucopyTS, ctrsm_oltncopyTS, +#endif +#if (BUILD_COMPLEX) ctrmm_kernel_RNTS, ctrmm_kernel_RTTS, ctrmm_kernel_RRTS, ctrmm_kernel_RCTS, ctrmm_kernel_LNTS, ctrmm_kernel_LTTS, ctrmm_kernel_LRTS, ctrmm_kernel_LCTS, @@ -361,7 +432,7 @@ gotoblas_t TABLE_NAME = { 0, 0, 0, -#if defined(USE_GEMM3M) +#if (USE_GEMM3M) #ifdef CGEMM3M_DEFAULT_UNROLL_M CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N, MAX(CGEMM3M_DEFAULT_UNROLL_M, CGEMM3M_DEFAULT_UNROLL_N), #else @@ -419,13 +490,20 @@ gotoblas_t TABLE_NAME = { NULL, NULL, NULL, NULL, #endif +#endif +#if (BUILD_COMPLEX || BUILD_COMPLEX16) #ifndef NO_LAPACK - cneg_tcopyTS, claswp_ncopyTS, + cneg_tcopyTS, + + claswp_ncopyTS, #else NULL, NULL, #endif +#endif + +#if BUILD_COMPLEX16 == 1 0, 0, 0, ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, #ifdef ZGEMM_DEFAULT_UNROLL_MN @@ -495,7 +573,7 @@ gotoblas_t TABLE_NAME = { zhemm_outcopyTS, zhemm_oltcopyTS, 0, 0, 0, -#if defined(USE_GEMM3M) +#if (USE_GEMM3M) #ifdef ZGEMM3M_DEFAULT_UNROLL_M ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N, MAX(ZGEMM3M_DEFAULT_UNROLL_M, ZGEMM3M_DEFAULT_UNROLL_N), #else @@ -560,6 +638,8 @@ gotoblas_t TABLE_NAME = { NULL, NULL, #endif +#endif + #ifdef EXPRECISION 0, 0, 0, @@ -626,7 +706,7 @@ gotoblas_t TABLE_NAME = { xhemm_outcopyTS, xhemm_oltcopyTS, 0, 0, 0, -#if defined(USE_GEMM3M) +#if (USE_GEMM3M) QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N, MAX(QGEMM_DEFAULT_UNROLL_M, QGEMM_DEFAULT_UNROLL_N), xgemm3m_kernelTS, @@ -691,52 +771,112 @@ gotoblas_t TABLE_NAME = { init_parameter, SNUMOPT, DNUMOPT, QNUMOPT, +#if BUILD_SINGLE == 1 + saxpby_kTS, +#endif +#if BUILD_DOUBLE == 1 + daxpby_kTS, +#endif +#if BUILD_COMPLEX == 1 + caxpby_kTS, +#endif +#if BUILD_COMPLEX16== 1 + zaxpby_kTS, +#endif - saxpby_kTS, daxpby_kTS, caxpby_kTS, zaxpby_kTS, - +#if BUILD_SINGLE == 1 somatcopy_k_cnTS, somatcopy_k_ctTS, somatcopy_k_rnTS, somatcopy_k_rtTS, +#endif +#if BUILD_DOUBLE== 1 domatcopy_k_cnTS, domatcopy_k_ctTS, domatcopy_k_rnTS, domatcopy_k_rtTS, +#endif +#if BUILD_COMPLEX == 1 comatcopy_k_cnTS, comatcopy_k_ctTS, comatcopy_k_rnTS, comatcopy_k_rtTS, comatcopy_k_cncTS, comatcopy_k_ctcTS, comatcopy_k_rncTS, comatcopy_k_rtcTS, +#endif +#if BUILD_COMPLEX16 == 1 zomatcopy_k_cnTS, zomatcopy_k_ctTS, zomatcopy_k_rnTS, zomatcopy_k_rtTS, zomatcopy_k_cncTS, zomatcopy_k_ctcTS, zomatcopy_k_rncTS, zomatcopy_k_rtcTS, +#endif +#if BUILD_SINGLE == 1 simatcopy_k_cnTS, simatcopy_k_ctTS, simatcopy_k_rnTS, simatcopy_k_rtTS, +#endif +#if BUILD_DOUBLE== 1 dimatcopy_k_cnTS, dimatcopy_k_ctTS, dimatcopy_k_rnTS, dimatcopy_k_rtTS, +#endif +#if BUILD_COMPLEX== 1 cimatcopy_k_cnTS, cimatcopy_k_ctTS, cimatcopy_k_rnTS, cimatcopy_k_rtTS, cimatcopy_k_cncTS, cimatcopy_k_ctcTS, cimatcopy_k_rncTS, cimatcopy_k_rtcTS, +#endif +#if BUILD_COMPLEX16==1 zimatcopy_k_cnTS, zimatcopy_k_ctTS, zimatcopy_k_rnTS, zimatcopy_k_rtTS, zimatcopy_k_cncTS, zimatcopy_k_ctcTS, zimatcopy_k_rncTS, zimatcopy_k_rtcTS, +#endif - sgeadd_kTS, dgeadd_kTS, cgeadd_kTS, zgeadd_kTS - +#if BUILD_SINGLE == 1 + sgeadd_kTS, +#endif +#if BUILD_DOUBLE==1 + dgeadd_kTS, +#endif +#if BUILD_COMPLEX==1 + cgeadd_kTS, +#endif +#if BUILD_COMPLEX16==1 + zgeadd_kTS +#endif }; -#if defined(ARCH_ARM64) +#if (ARCH_ARM64) static void init_parameter(void) { -#if defined(BUILD_HALF) +#if (BUILD_HALF) TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE == 1 TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif -#if defined(BUILD_HALF) +#if (BUILD_HALF) TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; #endif +#if BUILD_SINGLE == 1 TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; +#endif +#if BUILD_DOUBLE== 1 TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; +#endif +#if BUILD_COMPLEX== 1 TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; +#endif -#if defined(BUILD_HALF) +#if (BUILD_HALF) TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; #endif +#if BUILD_SINGLE == 1 TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R; +#endif +#if BUILD_DOUBLE==1 TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R; +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; @@ -747,7 +887,7 @@ static void init_parameter(void) { TABLE_NAME.xgemm_r = XGEMM_DEFAULT_R; #endif -#if defined(USE_GEMM3M) +#if (USE_GEMM3M) #ifdef CGEMM3M_DEFAULT_P TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; #else @@ -792,8 +932,8 @@ static void init_parameter(void) { #endif } -#else // defined(ARCH_ARM64) -#if defined(ARCH_POWER) +#else // (ARCH_ARM64) +#if (ARCH_POWER) static void init_parameter(void) { #ifdef BUILD_HALF @@ -823,7 +963,7 @@ static void init_parameter(void) { } #else //POWER -#if defined(ARCH_ZARCH) +#if (ARCH_ZARCH) static void init_parameter(void) { #ifdef BUILD_HALF TABLE_NAME.shgemm_p = SHGEMM_DEFAULT_P; @@ -989,22 +1129,34 @@ static void init_parameter(void) { TABLE_NAME.shgemm_r = SHGEMM_DEFAULT_R; TABLE_NAME.shgemm_q = SHGEMM_DEFAULT_Q; #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; +#endif +#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; +#endif +#if BUILD_COMPLEX == 1 TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q; +#endif +#if BUILD_COMPLEX == 1 #ifdef CGEMM3M_DEFAULT_Q TABLE_NAME.cgemm3m_q = CGEMM3M_DEFAULT_Q; #else TABLE_NAME.cgemm3m_q = SGEMM_DEFAULT_Q; #endif +#endif +#if BUILD_COMPLEX16 == 1 #ifdef ZGEMM3M_DEFAULT_Q TABLE_NAME.zgemm3m_q = ZGEMM3M_DEFAULT_Q; #else TABLE_NAME.zgemm3m_q = DGEMM_DEFAULT_Q; #endif +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_q = QGEMM_DEFAULT_Q; @@ -1012,16 +1164,24 @@ static void init_parameter(void) { TABLE_NAME.xgemm3m_q = QGEMM_DEFAULT_Q; #endif -#if defined(CORE_KATMAI) || defined(CORE_COPPERMINE) || defined(CORE_BANIAS) || defined(CORE_YONAH) || defined(CORE_ATHLON) +#if (CORE_KATMAI) || (CORE_COPPERMINE) || (CORE_BANIAS) || (CORE_YONAH) || (CORE_ATHLON) #ifdef DEBUG fprintf(stderr, "Katmai, Coppermine, Banias, Athlon\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 64 * (l2 >> 7); +#endif +#if BUILD_DOUBLE == 1 TABLE_NAME.dgemm_p = 32 * (l2 >> 7); +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = 32 * (l2 >> 7); +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = 16 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 16 * (l2 >> 7); TABLE_NAME.xgemm_p = 8 * (l2 >> 7); @@ -1034,10 +1194,18 @@ static void init_parameter(void) { fprintf(stderr, "Northwood\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 96 * (l2 >> 7); +#endif +#if BUILD_DOUBLE == 1 TABLE_NAME.dgemm_p = 48 * (l2 >> 7); +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = 48 * (l2 >> 7); +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = 24 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 24 * (l2 >> 7); TABLE_NAME.xgemm_p = 12 * (l2 >> 7); @@ -1050,10 +1218,18 @@ static void init_parameter(void) { fprintf(stderr, "Atom\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 256; +#endif +#if BUILD_DOUBLE ==1 TABLE_NAME.dgemm_p = 128; +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = 128; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = 64; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 64; TABLE_NAME.xgemm_p = 32; @@ -1066,10 +1242,18 @@ static void init_parameter(void) { fprintf(stderr, "Prescott\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 56 * (l2 >> 7); +#endif +#if BUILD_DOUBLE ==1 TABLE_NAME.dgemm_p = 28 * (l2 >> 7); +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = 28 * (l2 >> 7); +#endif +#if BUILD_COMPLEX16 == 1 TABLE_NAME.zgemm_p = 14 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 14 * (l2 >> 7); TABLE_NAME.xgemm_p = 7 * (l2 >> 7); @@ -1082,10 +1266,18 @@ static void init_parameter(void) { fprintf(stderr, "Core2\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; +#endif +#if BUILD_DOUBLE==1 TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; @@ -1098,10 +1290,18 @@ static void init_parameter(void) { fprintf(stderr, "Penryn\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; +#endif +#if BUILD_DOUBLE == 1 TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; @@ -1114,10 +1314,18 @@ static void init_parameter(void) { fprintf(stderr, "Dunnington\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; +#endif +#if BUILD_DOUBLE ==1 TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; @@ -1131,10 +1339,18 @@ static void init_parameter(void) { fprintf(stderr, "Nehalem\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1147,10 +1363,18 @@ static void init_parameter(void) { fprintf(stderr, "Sandybridge\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1163,26 +1387,42 @@ static void init_parameter(void) { fprintf(stderr, "Haswell\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16) TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; #endif #endif -#if defined (SKYLAKEX) || defined (COOPERLAKE) +#if defined(SKYLAKEX) || defined(COOPERLAKE) #ifdef DEBUG fprintf(stderr, "SkylakeX\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1196,10 +1436,18 @@ static void init_parameter(void) { fprintf(stderr, "Opteron\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = 224 + 56 * (l2 >> 7); +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = 112 + 28 * (l2 >> 7); +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = 112 + 28 * (l2 >> 7); +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = 56 + 14 * (l2 >> 7); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = 56 + 14 * (l2 >> 7); TABLE_NAME.xgemm_p = 28 + 7 * (l2 >> 7); @@ -1212,10 +1460,18 @@ static void init_parameter(void) { fprintf(stderr, "Barcelona\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1228,10 +1484,18 @@ static void init_parameter(void) { fprintf(stderr, "Bobcate\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1244,10 +1508,18 @@ static void init_parameter(void) { fprintf(stderr, "Bulldozer\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1260,10 +1532,18 @@ static void init_parameter(void) { fprintf(stderr, "Excavator\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1277,10 +1557,18 @@ static void init_parameter(void) { fprintf(stderr, "Piledriver\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1293,10 +1581,18 @@ static void init_parameter(void) { fprintf(stderr, "Steamroller\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1309,10 +1605,18 @@ static void init_parameter(void) { fprintf(stderr, "Zen\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if BUILD_DOUBLE TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if BUILD_COMPLEX16 TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_p = QGEMM_DEFAULT_P; TABLE_NAME.xgemm_p = XGEMM_DEFAULT_P; @@ -1326,11 +1630,18 @@ static void init_parameter(void) { fprintf(stderr, "NANO\n"); #endif +#if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; +#endif +#if (BUILD_DOUBLE==1) TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; +#endif +#if (BUILD_COMPLEX==1) TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; +#endif +#if (BUILD_COMPLEX16==1) TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P; - +#endif #ifdef EXPRECISION @@ -1340,41 +1651,55 @@ static void init_parameter(void) { #endif - +#if BUILD_COMPLEX==1 #ifdef CGEMM3M_DEFAULT_P TABLE_NAME.cgemm3m_p = CGEMM3M_DEFAULT_P; #else TABLE_NAME.cgemm3m_p = TABLE_NAME.sgemm_p; #endif +#endif +#if BUILD_COMPLEX16==1 #ifdef ZGEMM3M_DEFAULT_P TABLE_NAME.zgemm3m_p = ZGEMM3M_DEFAULT_P; #else TABLE_NAME.zgemm3m_p = TABLE_NAME.dgemm_p; #endif +#endif #ifdef EXPRECISION TABLE_NAME.xgemm3m_p = TABLE_NAME.qgemm_p; #endif - +#if BUILD_SINGLE == 1 TABLE_NAME.sgemm_p = ((TABLE_NAME.sgemm_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; +#endif +#if BUILD_DOUBLE== 1 TABLE_NAME.dgemm_p = ((TABLE_NAME.dgemm_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; +#endif +#if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = ((TABLE_NAME.cgemm_p + CGEMM_DEFAULT_UNROLL_M - 1)/CGEMM_DEFAULT_UNROLL_M) * CGEMM_DEFAULT_UNROLL_M; +#endif +#if BUILD_COMPLEX16==1 TABLE_NAME.zgemm_p = ((TABLE_NAME.zgemm_p + ZGEMM_DEFAULT_UNROLL_M - 1)/ZGEMM_DEFAULT_UNROLL_M) * ZGEMM_DEFAULT_UNROLL_M; +#endif +#if BUILD_COMPLEX==1 #ifdef CGEMM3M_DEFAULT_UNROLL_M TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + CGEMM3M_DEFAULT_UNROLL_M - 1)/CGEMM3M_DEFAULT_UNROLL_M) * CGEMM3M_DEFAULT_UNROLL_M; #else TABLE_NAME.cgemm3m_p = ((TABLE_NAME.cgemm3m_p + SGEMM_DEFAULT_UNROLL_M - 1)/SGEMM_DEFAULT_UNROLL_M) * SGEMM_DEFAULT_UNROLL_M; #endif +#endif +#if BUILD_COMPLEX16==1 #ifdef ZGEMM3M_DEFAULT_UNROLL_M TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + ZGEMM3M_DEFAULT_UNROLL_M - 1)/ZGEMM3M_DEFAULT_UNROLL_M) * ZGEMM3M_DEFAULT_UNROLL_M; #else TABLE_NAME.zgemm3m_p = ((TABLE_NAME.zgemm3m_p + DGEMM_DEFAULT_UNROLL_M - 1)/DGEMM_DEFAULT_UNROLL_M) * DGEMM_DEFAULT_UNROLL_M; #endif +#endif #ifdef QUAD_PRECISION TABLE_NAME.qgemm_p = ((TABLE_NAME.qgemm_p + QGEMM_DEFAULT_UNROLL_M - 1)/QGEMM_DEFAULT_UNROLL_M) * QGEMM_DEFAULT_UNROLL_M; @@ -1386,15 +1711,19 @@ static void init_parameter(void) { fprintf(stderr, "L2 = %8d DGEMM_P .. %d\n", l2, TABLE_NAME.dgemm_p); #endif +#if BUILD_SINGLE==1 TABLE_NAME.sgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.sgemm_p * TABLE_NAME.sgemm_q * 4 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.sgemm_q * 4) - 15) & ~15); +#endif +#if BUILD_DOUBLE==1 TABLE_NAME.dgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.dgemm_p * TABLE_NAME.dgemm_q * 8 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.dgemm_q * 8) - 15) & ~15); +#endif #ifdef EXPRECISION TABLE_NAME.qgemm_r = (((BUFFER_SIZE - @@ -1403,26 +1732,33 @@ static void init_parameter(void) { ) / (TABLE_NAME.qgemm_q * 16) - 15) & ~15); #endif +#if BUILD_COMPLEX ==1 TABLE_NAME.cgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.cgemm_p * TABLE_NAME.cgemm_q * 8 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.cgemm_q * 8) - 15) & ~15); +#endif +#if BUILD_COMPLEX16 ==1 TABLE_NAME.zgemm_r = (((BUFFER_SIZE - ((TABLE_NAME.zgemm_p * TABLE_NAME.zgemm_q * 16 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.zgemm_q * 16) - 15) & ~15); +#endif +#if BUILD_COMPLEX == 1 TABLE_NAME.cgemm3m_r = (((BUFFER_SIZE - ((TABLE_NAME.cgemm3m_p * TABLE_NAME.cgemm3m_q * 8 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.cgemm3m_q * 8) - 15) & ~15); +#endif +#if BUILD_COMPLEX16 == 1 TABLE_NAME.zgemm3m_r = (((BUFFER_SIZE - ((TABLE_NAME.zgemm3m_p * TABLE_NAME.zgemm3m_q * 16 + TABLE_NAME.offsetA + TABLE_NAME.align) & ~TABLE_NAME.align) ) / (TABLE_NAME.zgemm3m_q * 16) - 15) & ~15); - +#endif @@ -1444,4 +1780,4 @@ static void init_parameter(void) { } #endif //POWER #endif //ZARCH -#endif //defined(ARCH_ARM64) +#endif //(ARCH_ARM64) diff --git a/kernel/x86_64/KERNEL b/kernel/x86_64/KERNEL index 4a2e13bed..d75196974 100644 --- a/kernel/x86_64/KERNEL +++ b/kernel/x86_64/KERNEL @@ -259,8 +259,12 @@ SNRM2KERNEL = nrm2_sse.S endif ifndef DNRM2KERNEL +ifeq ($(OSNAME),WINNT) +DNRM2KERNEL = ../arm/nrm2.c +else DNRM2KERNEL = nrm2.S endif +endif ifndef QNRM2KERNEL QNRM2KERNEL = nrm2.S @@ -271,8 +275,12 @@ CNRM2KERNEL = znrm2_sse.S endif ifndef ZNRM2KERNEL +ifeq ($(OSNAME),WINNT) +ZNRM2KERNEL = ../arm/znrm2.c +else ZNRM2KERNEL = znrm2.S endif +endif ifndef XNRM2KERNEL XNRM2KERNEL = znrm2.S diff --git a/lapack-netlib/LAPACKE/src/Makefile b/lapack-netlib/LAPACKE/src/Makefile index 8060151ae..a602dd7a0 100644 --- a/lapack-netlib/LAPACKE/src/Makefile +++ b/lapack-netlib/LAPACKE/src/Makefile @@ -46,6 +46,7 @@ OBJ = \ lapacke_ilaver.o \ lapacke_nancheck.o +ifeq ($(BUILD_COMPLEX),1) OBJ_C = \ lapacke_cbbcsd.o \ lapacke_cbbcsd_work.o \ @@ -653,7 +654,9 @@ lapacke_cupgtr.o \ lapacke_cupgtr_work.o \ lapacke_cupmtr.o \ lapacke_cupmtr_work.o +endif +ifeq ($(BUILD_DOUBLE),1) OBJ_D = \ lapacke_dbbcsd.o \ lapacke_dbbcsd_work.o \ @@ -1218,8 +1221,12 @@ lapacke_dtrttf_work.o \ lapacke_dtrttp.o \ lapacke_dtrttp_work.o \ lapacke_dtzrzf.o \ -lapacke_dtzrzf_work.o +lapacke_dtzrzf_work.o \ +lapacke_slag2d.o \ +lapacke_slag2d_work.o +endif +ifeq ($(BUILD_SINGLE),1) OBJ_S = \ lapacke_sbbcsd.o \ lapacke_sbbcsd_work.o \ @@ -1395,8 +1402,6 @@ lapacke_slacn2.o \ lapacke_slacn2_work.o \ lapacke_slacpy.o \ lapacke_slacpy_work.o \ -lapacke_slag2d.o \ -lapacke_slag2d_work.o \ lapacke_slamch.o \ lapacke_slamch_work.o \ lapacke_slange.o \ @@ -1781,7 +1786,9 @@ lapacke_strttp.o \ lapacke_strttp_work.o \ lapacke_stzrzf.o \ lapacke_stzrzf_work.o +endif +ifeq ($(BUILD_COMPLEX16),1) OBJ_Z = \ lapacke_zbbcsd.o \ lapacke_zbbcsd_work.o \ @@ -2393,35 +2400,52 @@ lapacke_zupgtr.o \ lapacke_zupgtr_work.o \ lapacke_zupmtr.o \ lapacke_zupmtr_work.o +endif ifdef BUILD_DEPRECATED -DEPRECATED = \ +ifeq ($(BUILD_COMPLEX),1) +DEPRECATEDC = \ lapacke_cggsvp.o \ lapacke_cggsvp_work.o \ -lapacke_dggsvp.o \ -lapacke_dggsvp_work.o \ -lapacke_sggsvp.o \ -lapacke_sggsvp_work.o \ -lapacke_zggsvp.o \ -lapacke_zggsvp_work.o \ lapacke_cggsvd.o \ lapacke_cggsvd_work.o \ +lapacke_cgeqpf.o \ +lapacke_cgeqpf_work.o +endif + +ifeq ($(BUILD_DOUBLE),1) +DEPRECATEDD = \ +lapacke_dggsvp.o \ +lapacke_dggsvp_work.o \ lapacke_dggsvd.o \ lapacke_dggsvd_work.o \ +lapacke_dgeqpf.o \ +lapacke_dgeqpf_work.o +endif + +ifeq ($(BUILD_SINGLE),1) +DEPRECATEDS = \ +lapacke_sggsvp.o \ +lapacke_sggsvp_work.o \ lapacke_sggsvd.o \ lapacke_sggsvd_work.o \ +lapacke_sgeqpf.o \ +lapacke_sgeqpf_work.o +endif + +ifeq ($(BUILD_COMPLEX16),1) +DEPRECATEDZ = \ +lapacke_zggsvp.o \ +lapacke_zggsvp_work.o \ lapacke_zggsvd.o \ lapacke_zggsvd_work.o \ -lapacke_cgeqpf.o \ -lapacke_cgeqpf_work.o \ -lapacke_dgeqpf.o \ -lapacke_dgeqpf_work.o \ -lapacke_sgeqpf.o \ -lapacke_sgeqpf_work.o \ lapacke_zgeqpf.o \ lapacke_zgeqpf_work.o endif +DEPRECATED = $(DEPRECATEDS) $(DEPRECATEDD) $(DEPRECATEDC) $(DEPRECATEDZ) +endif + ifdef USEXBLAS EXTENDED = \ lapacke_cgbrfsx.o lapacke_cporfsx.o lapacke_dgerfsx.o lapacke_sgbrfsx.o lapacke_ssyrfsx.o lapacke_zherfsx.o \ @@ -2440,37 +2464,50 @@ endif ifdef LAPACKE_WITH_TMG # FILE PARTS OF TMGLIB -MATGEN = \ +ifeq ($(BUILD_COMPLEX),1) +MATGENC = \ lapacke_clatms.o \ lapacke_clatms_work.o \ -lapacke_dlatms.o \ -lapacke_dlatms_work.o \ -lapacke_slatms.o \ -lapacke_slatms_work.o \ -lapacke_zlatms.o \ -lapacke_zlatms_work.o \ lapacke_clagge.o \ lapacke_clagge_work.o \ -lapacke_dlagge.o \ -lapacke_dlagge_work.o \ -lapacke_slagge.o \ -lapacke_slagge_work.o \ -lapacke_zlagge.o \ -lapacke_zlagge_work.o \ lapacke_claghe.o \ lapacke_claghe_work.o \ +lapacke_clagsy.o \ +lapacke_clagsy_work.o +endif +ifeq ($(BUILD_DOUBLE),1) +MATGEND = \ +lapacke_dlatms.o \ +lapacke_dlatms_work.o \ +lapacke_dlagge.o \ +lapacke_dlagge_work.o \ +lapacke_dlagsy.o \ +lapacke_dlagsy_work.o +endif +ifeq ($(BUILD_SINGLE),1) +MATGENS = \ +lapacke_slatms.o \ +lapacke_slatms_work.o \ +lapacke_slagge.o \ +lapacke_slagge_work.o \ +lapacke_slagsy.o \ +lapacke_slagsy_work.o +endif +ifeq ($(BUILD_COMPLEX16),1) +MATGENZ = \ +lapacke_zlatms.o \ +lapacke_zlatms_work.o \ +lapacke_zlagge.o \ +lapacke_zlagge_work.o \ lapacke_zlaghe.o \ lapacke_zlaghe_work.o \ -lapacke_clagsy.o \ -lapacke_clagsy_work.o \ -lapacke_dlagsy.o \ -lapacke_dlagsy_work.o \ -lapacke_slagsy.o \ -lapacke_slagsy_work.o \ lapacke_zlagsy.o \ lapacke_zlagsy_work.o endif +MATGEN = $(MATGENS) $(MATGEND) $(MATGENC) $(MATGENZ) +endif + .PHONY: all all: $(LAPACKELIB) diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index 9f79e20e9..83baac875 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -66,7 +66,9 @@ ALLAUX_O = ilaenv.o ilaenv2stage.o ieeeck.o lsamen.o xerbla.o xerbla_array.o \ ilaprec.o ilatrans.o ilauplo.o iladiag.o chla_transtype.o \ ../INSTALL/ilaver.o ../INSTALL/lsame.o ../INSTALL/slamch.o +ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" SCLAUX = \ + sbdsvdx.o sstevx.o sstein.o \ sbdsdc.o \ sbdsqr.o sdisna.o slabad.o slacpy.o sladiv.o slae2.o slaebz.o \ slaed0.o slaed1.o slaed2.o slaed3.o slaed4.o slaed5.o slaed6.o \ @@ -81,10 +83,14 @@ SCLAUX = \ slaset.o slasq1.o slasq2.o slasq3.o slasq4.o slasq5.o slasq6.o \ slasr.o slasrt.o slassq.o slasv2.o spttrf.o sstebz.o sstedc.o \ ssteqr.o ssterf.o slaisnan.o sisnan.o \ - slartgp.o slartgs.o \ + slartgp.o slartgs.o scombssq.o \ ../INSTALL/second_$(TIMER).o +endif +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" DZLAUX = \ + dcombssq.o \ + dbdsvdx.o dstevx.o dstein.o \ dbdsdc.o \ dbdsqr.o ddisna.o dlabad.o dlacpy.o dladiv.o dlae2.o dlaebz.o \ dlaed0.o dlaed1.o dlaed2.o dlaed3.o dlaed4.o dlaed5.o dlaed6.o \ @@ -101,9 +107,12 @@ DZLAUX = \ dsteqr.o dsterf.o dlaisnan.o disnan.o \ dlartgp.o dlartgs.o \ ../INSTALL/dlamch.o ../INSTALL/dsecnd_$(TIMER).o +endif +#ifeq ($(BUILD_SINGLE),1) +ifdef BUILD_SINGLE SLASRC_O = \ - sbdsvdx.o spotrf2.o sgetrf2.o \ + spotrf2.o sgetrf2.o \ sgbbrd.o sgbcon.o sgbequ.o sgbrfs.o sgbsv.o \ sgbsvx.o sgbtf2.o sgbtrf.o sgbtrs.o sgebak.o sgebal.o sgebd2.o \ sgebrd.o sgecon.o sgeequ.o sgees.o sgeesx.o sgeev.o sgeevx.o \ @@ -145,8 +154,7 @@ SLASRC_O = \ ssbev.o ssbevd.o ssbevx.o ssbgst.o ssbgv.o ssbgvd.o ssbgvx.o \ ssbtrd.o sspcon.o sspev.o sspevd.o sspevx.o sspgst.o \ sspgv.o sspgvd.o sspgvx.o ssprfs.o sspsv.o sspsvx.o ssptrd.o \ - ssptrf.o ssptri.o ssptrs.o sstegr.o sstein.o sstev.o sstevd.o sstevr.o \ - sstevx.o \ + ssptrf.o ssptri.o ssptrs.o sstegr.o sstev.o sstevd.o sstevr.o \ ssycon.o ssyev.o ssyevd.o ssyevr.o ssyevx.o ssygs2.o \ ssygst.o ssygv.o ssygvd.o ssygvx.o ssyrfs.o ssysv.o ssysvx.o \ ssytd2.o ssytf2.o ssytrd.o ssytrf.o ssytri.o ssytri2.o ssytri2x.o \ @@ -180,9 +188,13 @@ SLASRC_O = \ ssytrd_2stage.o ssytrd_sy2sb.o ssytrd_sb2st.o ssb2st_kernels.o \ ssyevd_2stage.o ssyev_2stage.o ssyevx_2stage.o ssyevr_2stage.o \ ssbev_2stage.o ssbevx_2stage.o ssbevd_2stage.o ssygv_2stage.o \ - sgesvdq.o scombssq.o + sgesvdq.o + +endif +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" DSLASRC_O = spotrs.o sgetrs.o spotrf.o sgetrf.o +endif ifdef USEXBLAS SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ @@ -194,6 +206,7 @@ SXLASRC = sgesvxx.o sgerfsx.o sla_gerfsx_extended.o sla_geamv.o \ slascl2.o sla_wwaddw.o endif +ifeq ($(BUILD_COMPLEX),1) CLASRC_O = \ cpotrf2.o cgetrf2.o \ cbdsqr.o cgbbrd.o cgbcon.o cgbequ.o cgbrfs.o cgbsv.o cgbsvx.o \ @@ -284,6 +297,7 @@ CLASRC_O = \ cheevd_2stage.o cheev_2stage.o cheevx_2stage.o cheevr_2stage.o \ chbev_2stage.o chbevx_2stage.o chbevd_2stage.o chegv_2stage.o \ cgesvdq.o +endif ifdef USEXBLAS CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ @@ -299,11 +313,13 @@ CXLASRC = cgesvxx.o cgerfsx.o cla_gerfsx_extended.o cla_geamv.o \ cla_lin_berr.o clarscl2.o clascl2.o cla_wwaddw.o endif -ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" +ZCLASRC_O = cpotrs.o cgetrs.o cpotrf.o cgetrf.o clag2z.o +endif +ifeq ($(BUILD_DOUBLE),1) DLASRC_O = \ dpotrf2.o dgetrf2.o \ - dbdsvdx.o \ dgbbrd.o dgbcon.o dgbequ.o dgbrfs.o dgbsv.o \ dgbsvx.o dgbtf2.o dgbtrf.o dgbtrs.o dgebak.o dgebal.o dgebd2.o \ dgebrd.o dgecon.o dgeequ.o dgees.o dgeesx.o dgeev.o dgeevx.o \ @@ -345,8 +361,7 @@ DLASRC_O = \ dsbev.o dsbevd.o dsbevx.o dsbgst.o dsbgv.o dsbgvd.o dsbgvx.o \ dsbtrd.o dspcon.o dspev.o dspevd.o dspevx.o dspgst.o \ dspgv.o dspgvd.o dspgvx.o dsprfs.o dspsv.o dspsvx.o dsptrd.o \ - dsptrf.o dsptri.o dsptrs.o dstegr.o dstein.o dstev.o dstevd.o dstevr.o \ - dstevx.o \ + dsptrf.o dsptri.o dsptrs.o dstegr.o dstev.o dstevd.o dstevr.o \ dsycon.o dsyev.o dsyevd.o dsyevr.o \ dsyevx.o dsygs2.o dsygst.o dsygv.o dsygvd.o dsygvx.o dsyrfs.o \ dsysv.o dsysvx.o \ @@ -381,7 +396,8 @@ DLASRC_O = \ dsytrd_2stage.o dsytrd_sy2sb.o dsytrd_sb2st.o dsb2st_kernels.o \ dsyevd_2stage.o dsyev_2stage.o dsyevx_2stage.o dsyevr_2stage.o \ dsbev_2stage.o dsbevx_2stage.o dsbevd_2stage.o dsygv_2stage.o \ - dgesvdq.o dcombssq.o + dgesvdq.o +endif ifdef USEXBLAS DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ @@ -393,6 +409,7 @@ DXLASRC = dgesvxx.o dgerfsx.o dla_gerfsx_extended.o dla_geamv.o \ dlascl2.o dla_wwaddw.o endif +ifeq ($(BUILD_COMPLEX16),1) ZLASRC_O = \ zpotrf2.o zgetrf2.o \ zbdsqr.o zgbbrd.o zgbcon.o zgbequ.o zgbrfs.o zgbsv.o zgbsvx.o \ @@ -471,7 +488,7 @@ ZLASRC_O = \ zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ zunmtr.o zupgtr.o \ zupmtr.o izmax1.o dzsum1.o zstemr.o \ - zcgesv.o zcposv.o zlag2c.o clag2z.o zlat2c.o \ + zcgesv.o zcposv.o zlag2c.o zlat2c.o \ zhfrk.o ztfttp.o zlanhf.o zpftrf.o zpftri.o zpftrs.o ztfsm.o ztftri.o \ ztfttr.o ztpttf.o ztpttr.o ztrttf.o ztrttp.o \ zgeequb.o zgbequb.o zsyequb.o zpoequb.o zheequb.o \ @@ -488,6 +505,7 @@ ZLASRC_O = \ zheevd_2stage.o zheev_2stage.o zheevx_2stage.o zheevr_2stage.o \ zhbev_2stage.o zhbevx_2stage.o zhbevd_2stage.o zhegv_2stage.o \ zgesvdq.o +endif ifdef USEXBLAS ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ @@ -501,18 +519,30 @@ ZXLASRC = zgesvxx.o zgerfsx.o zla_gerfsx_extended.o zla_geamv.o \ zla_lin_berr.o zlarscl2.o zlascl2.o zla_wwaddw.o endif -DEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ +ifeq ($(BUILD_COMPLEX),1) +CDEPRECSRC = DEPRECATED/cgegs.o DEPRECATED/cgegv.o DEPRECATED/cgelsx.o \ DEPRECATED/cgeqpf.o DEPRECATED/cggsvd.o DEPRECATED/cggsvp.o \ - DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o \ + DEPRECATED/clahrd.o DEPRECATED/clatzm.o DEPRECATED/ctzrqf.o +endif + +ifeq ($(BUILD_DOUBLE),1) +DDEPRECSRC = \ DEPRECATED/dgegs.o DEPRECATED/dgegv.o DEPRECATED/dgelsx.o \ DEPRECATED/dgeqpf.o DEPRECATED/dggsvd.o DEPRECATED/dggsvp.o \ - DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o \ + DEPRECATED/dlahrd.o DEPRECATED/dlatzm.o DEPRECATED/dtzrqf.o +endif +ifeq ($(BUILD_SINGLE),1) +SDEPRECSRC = \ DEPRECATED/sgegs.o DEPRECATED/sgegv.o DEPRECATED/sgelsx.o \ DEPRECATED/sgeqpf.o DEPRECATED/sggsvd.o DEPRECATED/sggsvp.o \ - DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o \ + DEPRECATED/slahrd.o DEPRECATED/slatzm.o DEPRECATED/stzrqf.o +endif +ifeq ($(BUILD_COMPLEX16),1) +ZDEPRECSRC = \ DEPRECATED/zgegs.o DEPRECATED/zgegv.o DEPRECATED/zgelsx.o \ DEPRECATED/zgeqpf.o DEPRECATED/zggsvd.o DEPRECATED/zggsvp.o \ DEPRECATED/zlahrd.o DEPRECATED/zlatzm.o DEPRECATED/ztzrqf.o +endif # filter out optimized codes from OpenBLAS ALL_AUX_OBJS = xerbla.o ../INSTALL/lsame.o @@ -560,7 +590,7 @@ ALLXOBJ = $(SXLASRC) $(DXLASRC) $(CXLASRC) $(ZXLASRC) endif ifdef BUILD_DEPRECATED -DEPRECATED = $(DEPRECSRC) +DEPRECATED = $(SDEPRECSRC) $(DDEPRECSRC) $(CDEPRECSRC) $(ZDEPRECSRC) endif .PHONY: all diff --git a/lapack-netlib/TESTING/MATGEN/Makefile b/lapack-netlib/TESTING/MATGEN/Makefile index 87432fd04..e21ebd6c3 100644 --- a/lapack-netlib/TESTING/MATGEN/Makefile +++ b/lapack-netlib/TESTING/MATGEN/Makefile @@ -33,25 +33,37 @@ TOPSRCDIR = ../.. include $(TOPSRCDIR)/make.inc +ifneq "$(or $(BUILD_SINGLE),$(BUILD_COMPLEX))" "" SCATGEN = slatm1.o slatm7.o slaran.o slarnd.o +endif +ifeq ($(BUILD_SINGLE),1) SMATGEN = slatms.o slatme.o slatmr.o slatmt.o \ slagge.o slagsy.o slakf2.o slarge.o slaror.o slarot.o slatm2.o \ slatm3.o slatm5.o slatm6.o slahilb.o +endif +ifeq ($(BUILD_COMPLEX),1) CMATGEN = clatms.o clatme.o clatmr.o clatmt.o \ clagge.o claghe.o clagsy.o clakf2.o clarge.o claror.o clarot.o \ clatm1.o clarnd.o clatm2.o clatm3.o clatm5.o clatm6.o clahilb.o +endif +ifneq "$(or $(BUILD_DOUBLE),$(BUILD_COMPLEX16))" "" DZATGEN = dlatm1.o dlatm7.o dlaran.o dlarnd.o +endif +ifeq ($(BUILD_DOUBLE),1) DMATGEN = dlatms.o dlatme.o dlatmr.o dlatmt.o \ dlagge.o dlagsy.o dlakf2.o dlarge.o dlaror.o dlarot.o dlatm2.o \ dlatm3.o dlatm5.o dlatm6.o dlahilb.o +endif +ifeq ($(BUILD_COMPLEX16),1) ZMATGEN = zlatms.o zlatme.o zlatmr.o zlatmt.o \ zlagge.o zlaghe.o zlagsy.o zlakf2.o zlarge.o zlaror.o zlarot.o \ zlatm1.o zlarnd.o zlatm2.o zlatm3.o zlatm5.o zlatm6.o zlahilb.o +endif .PHONY: all all: $(TMGLIB) @@ -97,5 +109,9 @@ cleanobj: cleanlib: rm -f $(TMGLIB) +ifeq ($(filter $(BUILD_SINGLE) $(BUILD_COMPLEX),1),) slaran.o: slaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +endif +ifeq ($(filter $(BUILD_DOUBLE) $(BUILD_COMPLEX16),1),) dlaran.o: dlaran.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $< +endif diff --git a/lapack/getf2/Makefile b/lapack/getf2/Makefile index 612c6f9cc..a524a3235 100644 --- a/lapack/getf2/Makefile +++ b/lapack/getf2/Makefile @@ -1,11 +1,19 @@ TOPDIR = ../.. include ../../Makefile.system +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" SBLASOBJS = sgetf2_k.$(SUFFIX) +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS = dgetf2_k.$(SUFFIX) +endif QBLASOBJS = qgetf2_k.$(SUFFIX) +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" CBLASOBJS = cgetf2_k.$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS = zgetf2_k.$(SUFFIX) +endif XBLASOBJS = xgetf2_k.$(SUFFIX) sgetf2_k.$(SUFFIX) : getf2_k.c diff --git a/lapack/getrf/Makefile b/lapack/getrf/Makefile index a559dfb0d..976ca3c0b 100644 --- a/lapack/getrf/Makefile +++ b/lapack/getrf/Makefile @@ -17,6 +17,19 @@ ZBLASOBJS += zgetrf_parallel.$(SUFFIX) XBLASOBJS += xgetrf_parallel.$(SUFFIX) endif +ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" +SBLASOBJS= +endif +ifneq ($(BUILD_DOUBLE),1) +DBLASOBJS= +endif +ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" +CBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX16),1) +ZBLASOBJS= +endif + ifeq ($(USE_OPENMP), 1) GETRF_SRC = getrf_parallel_omp.c else diff --git a/lapack/getrs/Makefile b/lapack/getrs/Makefile index 2640ef097..f32569367 100644 --- a/lapack/getrs/Makefile +++ b/lapack/getrs/Makefile @@ -17,6 +17,19 @@ ZBLASOBJS += zgetrs_N_parallel.$(SUFFIX) zgetrs_T_parallel.$(SUFFIX) zgetrs_R_pa XBLASOBJS += xgetrs_N_parallel.$(SUFFIX) xgetrs_T_parallel.$(SUFFIX) xgetrs_R_parallel.$(SUFFIX) xgetrs_C_parallel.$(SUFFIX) endif +ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" +SBLASOBJS= +endif +ifneq ($(BUILD_DOUBLE),1) +DBLASOBJS= +endif +ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" +CBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX16),1) +ZBLASOBJS= +endif + sgetrs_N_single.$(SUFFIX) : getrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANS $< -o $(@F) diff --git a/lapack/laswp/Makefile b/lapack/laswp/Makefile index 389800692..2028d994e 100644 --- a/lapack/laswp/Makefile +++ b/lapack/laswp/Makefile @@ -1,11 +1,19 @@ TOPDIR = ../.. include ../../Makefile.system +ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" SBLASOBJS = slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS = dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) +endif QBLASOBJS = qlaswp_plus.$(SUFFIX) qlaswp_minus.$(SUFFIX) +ifneq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" CBLASOBJS = claswp_plus.$(SUFFIX) claswp_minus.$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS = zlaswp_plus.$(SUFFIX) zlaswp_minus.$(SUFFIX) +endif XBLASOBJS = xlaswp_plus.$(SUFFIX) xlaswp_minus.$(SUFFIX) slaswp_plus.$(SUFFIX) slaswp_minus.$(SUFFIX) dlaswp_plus.$(SUFFIX) dlaswp_minus.$(SUFFIX) \ diff --git a/lapack/lauu2/Makefile b/lapack/lauu2/Makefile index dc6a640b4..60d2db4db 100644 --- a/lapack/lauu2/Makefile +++ b/lapack/lauu2/Makefile @@ -1,11 +1,19 @@ TOPDIR = ../.. include ../../Makefile.system +ifeq ($(BUILD_SINGLE),1) SBLASOBJS = slauu2_U.$(SUFFIX) slauu2_L.$(SUFFIX) +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS = dlauu2_U.$(SUFFIX) dlauu2_L.$(SUFFIX) +endif QBLASOBJS = qlauu2_U.$(SUFFIX) qlauu2_L.$(SUFFIX) +ifeq ($(BUILD_COMPLEX),1) CBLASOBJS = clauu2_U.$(SUFFIX) clauu2_L.$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS = zlauu2_U.$(SUFFIX) zlauu2_L.$(SUFFIX) +endif XBLASOBJS = xlauu2_U.$(SUFFIX) xlauu2_L.$(SUFFIX) slauu2_U.$(SUFFIX) : lauu2_U.c diff --git a/lapack/lauum/Makefile b/lapack/lauum/Makefile index f163479ef..c57f17937 100644 --- a/lapack/lauum/Makefile +++ b/lapack/lauum/Makefile @@ -17,6 +17,19 @@ ZBLASOBJS += zlauum_U_parallel.$(SUFFIX) zlauum_L_parallel.$(SUFFIX) XBLASOBJS += xlauum_U_parallel.$(SUFFIX) xlauum_L_parallel.$(SUFFIX) endif +ifneq ($(BUILD_SINGLE),1) +SBLASOBJS= +endif +ifneq ($(BUILD_DOUBLE),1) +DBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX),1) +CBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX16),1) +ZBLASOBJS= +endif + slauum_U_single.$(SUFFIX) : lauum_U_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) diff --git a/lapack/potf2/Makefile b/lapack/potf2/Makefile index 5946ad9c8..f48570064 100644 --- a/lapack/potf2/Makefile +++ b/lapack/potf2/Makefile @@ -8,6 +8,19 @@ CBLASOBJS = cpotf2_U.$(SUFFIX) cpotf2_L.$(SUFFIX) ZBLASOBJS = zpotf2_U.$(SUFFIX) zpotf2_L.$(SUFFIX) XBLASOBJS = xpotf2_U.$(SUFFIX) xpotf2_L.$(SUFFIX) +ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" + SBLASOBJS= +endif +ifneq ($(BUILD_DOUBLE),1) + DBLASOBJS= +endif +ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" + CBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX16),1) + ZBLASOBJS= +endif + spotf2_U.$(SUFFIX) : potf2_U.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) diff --git a/lapack/potrf/Makefile b/lapack/potrf/Makefile index 21efa5540..feefd0483 100644 --- a/lapack/potrf/Makefile +++ b/lapack/potrf/Makefile @@ -17,6 +17,20 @@ ZBLASOBJS += zpotrf_U_parallel.$(SUFFIX) zpotrf_L_parallel.$(SUFFIX) XBLASOBJS += xpotrf_U_parallel.$(SUFFIX) xpotrf_L_parallel.$(SUFFIX) endif +ifeq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" "" +SBLASOBJS= +endif +ifneq ($(BUILD_DOUBLE),1) +DBLASOBJS= +endif +ifeq "$(or $(BUILD_COMPLEX),$(BUILD_COMPLEX16))" "" +CBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX16),1) +ZBLASOBJS= +endif + + spotrf_U_single.$(SUFFIX) : potrf_U_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $(@F) diff --git a/lapack/trti2/Makefile b/lapack/trti2/Makefile index 45251fb1e..005e80d73 100644 --- a/lapack/trti2/Makefile +++ b/lapack/trti2/Makefile @@ -1,11 +1,19 @@ TOPDIR = ../.. include ../../Makefile.system +ifeq ($(BUILD_SINGLE),1) SBLASOBJS = strti2_UU.$(SUFFIX) strti2_UN.$(SUFFIX) strti2_LU.$(SUFFIX) strti2_LN.$(SUFFIX) +endif +ifeq ($(BUILD_DOUBLE),1) DBLASOBJS = dtrti2_UU.$(SUFFIX) dtrti2_UN.$(SUFFIX) dtrti2_LU.$(SUFFIX) dtrti2_LN.$(SUFFIX) +endif QBLASOBJS = qtrti2_UU.$(SUFFIX) qtrti2_UN.$(SUFFIX) qtrti2_LU.$(SUFFIX) qtrti2_LN.$(SUFFIX) +ifeq ($(BUILD_COMPLEX),1) CBLASOBJS = ctrti2_UU.$(SUFFIX) ctrti2_UN.$(SUFFIX) ctrti2_LU.$(SUFFIX) ctrti2_LN.$(SUFFIX) +endif +ifeq ($(BUILD_COMPLEX16),1) ZBLASOBJS = ztrti2_UU.$(SUFFIX) ztrti2_UN.$(SUFFIX) ztrti2_LU.$(SUFFIX) ztrti2_LN.$(SUFFIX) +endif XBLASOBJS = xtrti2_UU.$(SUFFIX) xtrti2_UN.$(SUFFIX) xtrti2_LU.$(SUFFIX) xtrti2_LN.$(SUFFIX) strti2_UU.$(SUFFIX) : trti2_U.c diff --git a/lapack/trtri/Makefile b/lapack/trtri/Makefile index 626c47bbf..72167ff56 100644 --- a/lapack/trtri/Makefile +++ b/lapack/trtri/Makefile @@ -23,6 +23,19 @@ ZBLASOBJS += ztrtri_UU_parallel.$(SUFFIX) ztrtri_UN_parallel.$(SUFFIX) ztrtri_LU XBLASOBJS += xtrtri_UU_parallel.$(SUFFIX) xtrtri_UN_parallel.$(SUFFIX) xtrtri_LU_parallel.$(SUFFIX) xtrtri_LN_parallel.$(SUFFIX) endif +ifneq ($(BUILD_SINGLE),1) +SBLASOBJS= +endif +ifneq ($(BUILD_DOUBLE),1) +DBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX),1) +CBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX16),1) +ZBLASOBJS= +endif + strtri_UU_single.$(SUFFIX) : trtri_U_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DUNIT $< -o $(@F) diff --git a/lapack/trtrs/Makefile b/lapack/trtrs/Makefile index a3b8f4322..8ba63c21a 100644 --- a/lapack/trtrs/Makefile +++ b/lapack/trtrs/Makefile @@ -17,6 +17,19 @@ ZBLASOBJS += ztrtrs_UNU_parallel.$(SUFFIX) ztrtrs_UNN_parallel.$(SUFFIX) ztrtrs_ XBLASOBJS += xtrtrs_UNU_parallel.$(SUFFIX) xtrtrs_UNN_parallel.$(SUFFIX) xtrtrs_UTU_parallel.$(SUFFIX) xtrtrs_UTN_parallel.$(SUFFIX) xtrtrs_URU_parallel.$(SUFFIX) xtrtrs_URN_parallel.$(SUFFIX) xtrtrs_UCU_parallel.$(SUFFIX) xtrtrs_UCN_parallel.$(SUFFIX) xtrtrs_LNU_parallel.$(SUFFIX) xtrtrs_LNN_parallel.$(SUFFIX) xtrtrs_LTU_parallel.$(SUFFIX) xtrtrs_LTN_parallel.$(SUFFIX) xtrtrs_LRU_parallel.$(SUFFIX) xtrtrs_LRN_parallel.$(SUFFIX) xtrtrs_LCU_parallel.$(SUFFIX) xtrtrs_LCN_parallel.$(SUFFIX) endif +ifneq ($(BUILD_SINGLE),1) +SBLASOBJS= +endif +ifneq ($(BUILD_DOUBLE),1) +DBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX),1) +CBLASOBJS= +endif +ifneq ($(BUILD_COMPLEX16),1) +ZBLASOBJS= +endif + strtrs_UNU_single.$(SUFFIX) : trtrs_single.c $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UUPLO -UTRANS -UDIAG $< -o $(@F) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f1f773cba..360ff2151 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories(${PROJECT_BINARY_DIR}) enable_language(Fortran) if (BUILD_SINGLE) - list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3) + list( APPEND OpenBLAS_Tests sblat1 sblat2 sblat3) endif() if (BUILD_DOUBLE) list (APPEND OpenBLAS_Tests dblat1 dblat2 dblat3) @@ -17,7 +17,7 @@ if (BUILD_COMPLEX16) endif() foreach(test_bin ${OpenBLAS_Tests}) -add_executable(${test_bin} ${test_bin}.f) + add_executable(${test_bin} ${test_bin}.f) target_link_libraries(${test_bin} ${OpenBLAS_LIBNAME}) endforeach() @@ -34,7 +34,19 @@ FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_helper.sh "fi\n" ) -set(float_types s d c z) +#set(float_types s d c z) +if (BUILD_SINGLE) + list (APPEND float_types s) +endif() +if (BUILD_DOUBLE) + list (APPEND float_types d) +endif() +if (BUILD_COMPLEX) + list (APPEND float_types c) +endif() +if (BUILD_COMPLEX16) + list (APPEND float_types z) +endif() foreach(float_type ${float_types}) string(TOUPPER ${float_type} float_type_upper) add_test(NAME "${float_type}blas1" diff --git a/test/Makefile b/test/Makefile index 45f9821ec..069d7880a 100644 --- a/test/Makefile +++ b/test/Makefile @@ -7,82 +7,241 @@ all :: else all :: level1 level2 level3 endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) +level1: sblat1 dblat1 cblat1 zblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) +level1: dblat1 cblat1 zblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) +level1: sblat1 cblat1 zblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) +level1: cblat1 zblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) +level1: cblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) +level1: zblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) +level1: sblat1 zblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) +level1: sblat1 dblat1 zblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) +level1: sblat1 dblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) +level1: sblat1 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) +level1: dblat1 +endif -level1 : sblat1 dblat1 cblat1 zblat1 ifndef CROSS +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat1 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat1 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat1 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat1 +endif ifdef SMP ifeq ($(USE_OPENMP), 1) +ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./sblat1 +endif +ifeq ($(BUILD_DOUBLE),1) OMP_NUM_THREADS=2 ./dblat1 +endif +ifeq ($(BUILD_COMPLEX),1) OMP_NUM_THREADS=2 ./cblat1 +endif +ifeq ($(BUILD_COMPLEX16),1) OMP_NUM_THREADS=2 ./zblat1 +endif else +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=2 ./sblat1 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=2 ./dblat1 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=2 ./cblat1 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=2 ./zblat1 endif endif endif +endif + +#level2: sblat2 dblat2 cblat2 zblat2 +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) +level2: sblat2 dblat2 cblat2 zblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) +level2: dblat2 cblat2 zblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) +level2: sblat2 cblat2 zblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) +level2: cblat2 zblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) +level2: cblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) +level2: zblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) +level2: sblat2 zblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) +level2: sblat2 dblat2 zblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) +level2: sblat2 dblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) +level2: sblat2 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) +level2: dblat2 +endif -level2 : sblat2 dblat2 cblat2 zblat2 ifndef CROSS rm -f ?BLAT2.SUMM +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat2 < ./dblat2.dat @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat2 < ./cblat2.dat @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat2 < ./zblat2.dat @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 +endif ifdef SMP rm -f ?BLAT2.SUMM ifeq ($(USE_OPENMP), 1) +ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_DOUBLE),1) OMP_NUM_THREADS=2 ./dblat2 < ./dblat2.dat @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX),1) OMP_NUM_THREADS=2 ./cblat2 < ./cblat2.dat @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX16),1) OMP_NUM_THREADS=2 ./zblat2 < ./zblat2.dat @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 +endif else +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat @$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=2 ./dblat2 < ./dblat2.dat @$(GREP) -q FATAL DBLAT2.SUMM && cat DBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=2 ./cblat2 < ./cblat2.dat @$(GREP) -q FATAL CBLAT2.SUMM && cat CBLAT2.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=2 ./zblat2 < ./zblat2.dat @$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0 endif endif endif - -ifeq ($(BUILD_HALF),1) -level3 : test_shgemm sblat3 dblat3 cblat3 zblat3 -else -level3 : sblat3 dblat3 cblat3 zblat3 endif + +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1x1x1) +level3: sblat3 dblat3 cblat3 zblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1x1x1) +level3: dblat3 cblat3 zblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xx1x1) +level3: sblat3 cblat3 zblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x1) +level3: cblat3 zblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xx1x) +level3: cblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),xxx1) +level3: zblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx1) +level3: sblat3 zblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx1) +level3: sblat3 dblat3 zblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1x1xx) +level3: sblat3 dblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),1xxx) +level3: sblat3 +endif +ifeq ($(BUILD_SINGLE)x$(BUILD_DOUBLE)x$(BUILD_COMPLEX)x$(BUILD_COMPLEX16),x1xx) +level3: dblat3 +endif + + + +#ifeq ($(BUILD_HALF),1) +#level3 : test_shgemm sblat3 dblat3 cblat3 zblat3 +#else +#level3 : sblat3 dblat3 cblat3 zblat3 +#endif + ifndef CROSS rm -f ?BLAT3.SUMM ifeq ($(BUILD_HALF),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_shgemm > SHBLAT3.SUMM @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 endif +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./dblat3 < ./dblat3.dat @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./cblat3 < ./cblat3.dat @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./zblat3 < ./zblat3.dat @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 +endif ifdef SMP rm -f ?BLAT3.SUMM ifeq ($(USE_OPENMP), 1) @@ -90,30 +249,46 @@ ifeq ($(BUILD_HALF),1) OMP_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 endif +ifeq ($(BUILD_SINGLE),1) OMP_NUM_THREADS=2 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_DOUBLE),1) OMP_NUM_THREADS=2 ./dblat3 < ./dblat3.dat @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX),1) OMP_NUM_THREADS=2 ./cblat3 < ./cblat3.dat @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX16),1) OMP_NUM_THREADS=2 ./zblat3 < ./zblat3.dat @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 +endif else ifeq ($(BUILD_HALF),1) OPENBLAS_NUM_THREADS=2 ./test_shgemm > SHBLAT3.SUMM @$(GREP) -q FATAL SHBLAT3.SUMM && cat SHBLAT3.SUMM || exit 0 endif +ifeq ($(BUILD_SINGLE),1) OPENBLAS_NUM_THREADS=2 ./sblat3 < ./sblat3.dat @$(GREP) -q FATAL SBLAT3.SUMM && cat SBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_DOUBLE),1) OPENBLAS_NUM_THREADS=2 ./dblat3 < ./dblat3.dat @$(GREP) -q FATAL DBLAT3.SUMM && cat DBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX),1) OPENBLAS_NUM_THREADS=2 ./cblat3 < ./cblat3.dat @$(GREP) -q FATAL CBLAT3.SUMM && cat CBLAT3.SUMM || exit 0 +endif +ifeq ($(BUILD_COMPLEX16),1) OPENBLAS_NUM_THREADS=2 ./zblat3 < ./zblat3.dat @$(GREP) -q FATAL ZBLAT3.SUMM && cat ZBLAT3.SUMM || exit 0 endif endif endif +endif level3_3m : zblat3_3m cblat3_3m @@ -151,56 +326,71 @@ endif endif endif +ifeq ($(BUILD_SINGLE),1) sblat1 : sblat1.$(SUFFIX) ../$(LIBNAME) $(FC) $(FLDFLAGS) -o sblat1 sblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) -dblat1 : dblat1.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o dblat1 dblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - -qblat1 : qblat1.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o qblat1 qblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - -cblat1 : cblat1.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o cblat1 cblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - -zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - sblat2 : sblat2.$(SUFFIX) ../$(LIBNAME) $(FC) $(FLDFLAGS) -o sblat2 sblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +endif + +ifeq ($(BUILD_DOUBLE),1) +dblat1 : dblat1.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o dblat1 dblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + dblat2 : dblat2.$(SUFFIX) ../$(LIBNAME) $(FC) $(FLDFLAGS) -o dblat2 dblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +else +dblat2: +dblat3: +endif + + +qblat1 : qblat1.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o qblat1 qblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + +ifeq ($(BUILD_COMPLEX),1) +cblat1 : cblat1.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o cblat1 cblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + cblat2 : cblat2.$(SUFFIX) ../$(LIBNAME) $(FC) $(FLDFLAGS) -o cblat2 cblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +endif + +ifeq ($(BUILD_COMPLEX16),1) +zblat1 : zblat1.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o zblat1 zblat1.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) + zblat2 : zblat2.$(SUFFIX) ../$(LIBNAME) $(FC) $(FLDFLAGS) -o zblat2 zblat2.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) -sblat3 : sblat3.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o sblat3 sblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME) + $(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +endif ifeq ($(BUILD_HALF),1) test_shgemm : compare_sgemm_shgemm.c ../$(LIBNAME) $(FC) $(FLDFLAGS) -o test_shgemm compare_sgemm_shgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) endif -dblat3 : dblat3.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o dblat3 dblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - -cblat3 : cblat3.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o cblat3 cblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - -zblat3 : zblat3.$(SUFFIX) ../$(LIBNAME) - $(FC) $(FLDFLAGS) -o zblat3 zblat3.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - +ifeq ($(BUILD_COMPLEX),1) cblat3_3m : cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(FC) $(FLDFLAGS) -o cblat3_3m cblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) +endif +ifeq ($(BUILD_COMPLEX16),1) zblat3_3m : zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(FC) $(FLDFLAGS) -o zblat3_3m zblat3_3m.$(SUFFIX) ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB) - +endif diff --git a/utest/test_dsdot.c b/utest/test_dsdot.c index 57da7101e..adef4e91c 100644 --- a/utest/test_dsdot.c +++ b/utest/test_dsdot.c @@ -32,7 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. **********************************************************************************/ #include "openblas_utest.h" - +#if defined(BUILD_SINGLE) && defined(BUILD_DOUBLE) CTEST(dsdot,dsdot_n_1) { float x= 0.172555164F; @@ -47,17 +47,4 @@ CTEST(dsdot,dsdot_n_1) ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS); } - -CTEST(dsdot,dsdot_n_2) -{ - float x[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F}; - float y[] = {0.1F, 0.2F, 0.3F, 0.4F, 0.5F, 0.6F, 0.7F, 0.8F}; - blasint incx=1; - blasint incy=1; - blasint n=8; - - double res1=0.0f, res2= 2.0400000444054616; - - res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); - ASSERT_DBL_NEAR_TOL(res2, res1, DOUBLE_EPS); -} \ No newline at end of file +#endif diff --git a/utest/test_fork.c b/utest/test_fork.c index 0b90407b1..5c976f920 100644 --- a/utest/test_fork.c +++ b/utest/test_fork.c @@ -48,6 +48,7 @@ void* xmalloc(size_t n) } } +#ifdef BUILD_DOUBLE void check_dgemm(double *a, double *b, double *result, double *expected, blasint n) { char trans1 = 'T'; @@ -59,9 +60,13 @@ void check_dgemm(double *a, double *b, double *result, double *expected, blasint ASSERT_DBL_NEAR_TOL(expected[i], result[i], DOUBLE_EPS); } } +#endif CTEST(fork, safety) { +#ifndef BUILD_DOUBLE +exit(0); +#else blasint n = 1000; int i; @@ -124,4 +129,5 @@ CTEST(fork, safety) ASSERT_EQUAL(wait_pid, fork_pid); ASSERT_EQUAL(0, WEXITSTATUS (child_status)); } +#endif } diff --git a/utest/test_potrs.c b/utest/test_potrs.c index 2681615f4..f39287d6f 100644 --- a/utest/test_potrs.c +++ b/utest/test_potrs.c @@ -529,16 +529,20 @@ CTEST(potrf, smoketest_trivial){ for (j = 0; j < n; ++j) { double err; +#ifdef BUILD_SINGLE err = fabs(A1s[i+n*j] - Bs[i+n*j]); if (err > 1e-5) { CTEST_ERR("%s:%d %c s(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); } - +#endif +#ifdef BUILD_DOUBLE err = fabs(A1d[i+n*j] - Bd[i+n*j]); if (err > 1e-12) { CTEST_ERR("%s:%d %c d(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); } +#endif +#ifdef BUILD_COMPLEX #ifdef OPENBLAS_COMPLEX_C99 err = cabsf(A1c[i+n*j] - Bc[i+n*j]); #else @@ -548,7 +552,9 @@ CTEST(potrf, smoketest_trivial){ if (err > 1e-5) { CTEST_ERR("%s:%d %c c(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); } +#endif +#ifdef BUILD_COMPLEX16 #ifdef OPENBLAS_COMPLEX_C99 err = cabs(A1z[i+n*j] - Bz[i+n*j]); #else @@ -558,6 +564,7 @@ CTEST(potrf, smoketest_trivial){ if (err > 1e-12) { CTEST_ERR("%s:%d %c z(%d,%d) difference: %g", __FILE__, __LINE__, uplo, i, j, err); } +#endif } } }