diff --git a/Makefile b/Makefile index 343bd72f4..01bedaf4d 100644 --- a/Makefile +++ b/Makefile @@ -207,6 +207,7 @@ else netlib : lapack_prebuild ifndef NOFORTRAN @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib + @$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib endif ifndef NO_LAPACKE @$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib @@ -230,11 +231,18 @@ ifndef NOFORTRAN -@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc + -@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc +ifeq ($(F_COMPILER), GFORTRAN) + -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc +else + -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc +endif -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc endif @@ -256,13 +264,12 @@ lapack-timing : large.tgz timing.tgz ifndef NOFORTRAN (cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING) (cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz ) - make -C $(NETLIB_LAPACK_DIR) tmglib make -C $(NETLIB_LAPACK_DIR)/TIMING endif lapack-test : - make -j 1 -C $(NETLIB_LAPACK_DIR) tmglib + (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) @@ -291,4 +298,5 @@ endif @$(MAKE) -C $(NETLIB_LAPACK_DIR) clean @rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h @rm -f *.grd Makefile.conf_last config_last.h + @(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt) @echo Done. diff --git a/exports/gensymbol b/exports/gensymbol index 7e339c99e..58a309f9e 100644 --- a/exports/gensymbol +++ b/exports/gensymbol @@ -2667,34 +2667,34 @@ ## @(MATGEN_OBJ) from `lapack-3.4.1/lapacke/src/Makefile` ## Not exported: requires LAPACKE_TESTING to be set and depends on libtmg ## (see `lapack-3.4.1/TESTING/MATGEN`). - #LAPACKE_clatms, - #LAPACKE_clatms_work, - #LAPACKE_dlatms, - #LAPACKE_dlatms_work, - #LAPACKE_slatms, - #LAPACKE_slatms_work, - #LAPACKE_zlatms, - #LAPACKE_zlatms_work, - #LAPACKE_clagge, - #LAPACKE_clagge_work, - #LAPACKE_dlagge, - #LAPACKE_dlagge_work, - #LAPACKE_slagge, - #LAPACKE_slagge_work, - #LAPACKE_zlagge, - #LAPACKE_zlagge_work, - #LAPACKE_claghe, - #LAPACKE_claghe_work, - #LAPACKE_zlaghe, - #LAPACKE_zlaghe_work, - #LAPACKE_clagsy, - #LAPACKE_clagsy_work, - #LAPACKE_dlagsy, - #LAPACKE_dlagsy_work, - #LAPACKE_slagsy, - #LAPACKE_slagsy_work, - #LAPACKE_zlagsy, - #LAPACKE_zlagsy_work, + LAPACKE_clatms, + LAPACKE_clatms_work, + LAPACKE_dlatms, + LAPACKE_dlatms_work, + LAPACKE_slatms, + LAPACKE_slatms_work, + LAPACKE_zlatms, + LAPACKE_zlatms_work, + LAPACKE_clagge, + LAPACKE_clagge_work, + LAPACKE_dlagge, + LAPACKE_dlagge_work, + LAPACKE_slagge, + LAPACKE_slagge_work, + LAPACKE_zlagge, + LAPACKE_zlagge_work, + LAPACKE_claghe, + LAPACKE_claghe_work, + LAPACKE_zlaghe, + LAPACKE_zlaghe_work, + LAPACKE_clagsy, + LAPACKE_clagsy_work, + LAPACKE_dlagsy, + LAPACKE_dlagsy_work, + LAPACKE_slagsy, + LAPACKE_slagsy_work, + LAPACKE_zlagsy, + LAPACKE_zlagsy_work, ); #These function may need 2 underscores. diff --git a/interface/Makefile b/interface/Makefile index 16d59a6e6..9774f37b2 100644 --- a/interface/Makefile +++ b/interface/Makefile @@ -349,7 +349,8 @@ XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) SLAPACKOBJS = \ sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ - spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) + spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \ + slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX) #DLAPACKOBJS = \ @@ -359,7 +360,8 @@ SLAPACKOBJS = \ DLAPACKOBJS = \ dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \ - dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) + dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \ + dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX) QLAPACKOBJS = \ @@ -374,7 +376,8 @@ QLAPACKOBJS = \ CLAPACKOBJS = \ cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \ - cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) + cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \ + clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX) #ZLAPACKOBJS = \ @@ -384,7 +387,9 @@ CLAPACKOBJS = \ ZLAPACKOBJS = \ zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \ - zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) + zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \ + zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX) + @@ -1788,37 +1793,37 @@ zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c $(CC) -c $(CFLAGS) $< -o $(@F) -slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lauu2.c +slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lapack/lauu2.c $(CC) -c $(CFLAGS) $< -o $(@F) -dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lauu2.c +dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lapack/lauu2.c $(CC) -c $(CFLAGS) $< -o $(@F) qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c $(CC) -c $(CFLAGS) $< -o $(@F) -clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : zlauu2.c +clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : lapack/zlauu2.c $(CC) -c $(CFLAGS) $< -o $(@F) -zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : zlauu2.c +zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : lapack/zlauu2.c $(CC) -c $(CFLAGS) $< -o $(@F) xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c $(CC) -c $(CFLAGS) $< -o $(@F) -slauum.$(SUFFIX) slauum.$(PSUFFIX) : lauum.c +slauum.$(SUFFIX) slauum.$(PSUFFIX) : lapack/lauum.c $(CC) -c $(CFLAGS) $< -o $(@F) -dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lauum.c +dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lapack/lauum.c $(CC) -c $(CFLAGS) $< -o $(@F) qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c $(CC) -c $(CFLAGS) $< -o $(@F) -clauum.$(SUFFIX) clauum.$(PSUFFIX) : zlauum.c +clauum.$(SUFFIX) clauum.$(PSUFFIX) : lapack/zlauum.c $(CC) -c $(CFLAGS) $< -o $(@F) -zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c +zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : lapack/zlauum.c $(CC) -c $(CFLAGS) $< -o $(@F) xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c @@ -1860,37 +1865,37 @@ zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c $(CC) -c $(CFLAGS) $< -o $(@F) -strti2.$(SUFFIX) strti2.$(PSUFFIX) : trti2.c +strti2.$(SUFFIX) strti2.$(PSUFFIX) : lapack/trti2.c $(CC) -c $(CFLAGS) $< -o $(@F) -dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : trti2.c +dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : lapack/trti2.c $(CC) -c $(CFLAGS) $< -o $(@F) qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c $(CC) -c $(CFLAGS) $< -o $(@F) -ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : ztrti2.c +ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : lapack/ztrti2.c $(CC) -c $(CFLAGS) $< -o $(@F) -ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : ztrti2.c +ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : lapack/ztrti2.c $(CC) -c $(CFLAGS) $< -o $(@F) xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c $(CC) -c $(CFLAGS) $< -o $(@F) -strtri.$(SUFFIX) strtri.$(PSUFFIX) : trtri.c +strtri.$(SUFFIX) strtri.$(PSUFFIX) : lapack/trtri.c $(CC) -c $(CFLAGS) $< -o $(@F) -dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : trtri.c +dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : lapack/trtri.c $(CC) -c $(CFLAGS) $< -o $(@F) qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c $(CC) -c $(CFLAGS) $< -o $(@F) -ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : ztrtri.c +ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : lapack/ztrtri.c $(CC) -c $(CFLAGS) $< -o $(@F) -ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c +ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : lapack/ztrtri.c $(CC) -c $(CFLAGS) $< -o $(@F) xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c @@ -1950,19 +1955,19 @@ zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c $(CC) -c $(CFLAGS) $< -o $(@F) -spotri.$(SUFFIX) spotri.$(PSUFFIX) : potri.c +spotri.$(SUFFIX) spotri.$(PSUFFIX) : lapack/potri.c $(CC) -c $(CFLAGS) $< -o $(@F) -dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : potri.c +dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : lapack/potri.c $(CC) -c $(CFLAGS) $< -o $(@F) qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c $(CC) -c $(CFLAGS) $< -o $(@F) -cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : zpotri.c +cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : lapack/zpotri.c $(CC) -c $(CFLAGS) $< -o $(@F) -zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : zpotri.c +zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : lapack/zpotri.c $(CC) -c $(CFLAGS) $< -o $(@F) xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c diff --git a/interface/lapack/lauu2.c.bad b/interface/lapack/lauu2.c similarity index 100% rename from interface/lapack/lauu2.c.bad rename to interface/lapack/lauu2.c diff --git a/interface/lapack/lauum.c.bad b/interface/lapack/lauum.c similarity index 100% rename from interface/lapack/lauum.c.bad rename to interface/lapack/lauum.c diff --git a/interface/lapack/potri.c.bad b/interface/lapack/potri.c similarity index 100% rename from interface/lapack/potri.c.bad rename to interface/lapack/potri.c diff --git a/interface/lapack/trti2.c.bad b/interface/lapack/trti2.c similarity index 100% rename from interface/lapack/trti2.c.bad rename to interface/lapack/trti2.c diff --git a/interface/lapack/trtri.c.bad b/interface/lapack/trtri.c similarity index 100% rename from interface/lapack/trtri.c.bad rename to interface/lapack/trtri.c diff --git a/interface/lapack/zlauu2.c.bad b/interface/lapack/zlauu2.c similarity index 100% rename from interface/lapack/zlauu2.c.bad rename to interface/lapack/zlauu2.c diff --git a/interface/lapack/zlauum.c.bad b/interface/lapack/zlauum.c similarity index 100% rename from interface/lapack/zlauum.c.bad rename to interface/lapack/zlauum.c diff --git a/interface/lapack/zpotri.c.bad b/interface/lapack/zpotri.c similarity index 95% rename from interface/lapack/zpotri.c.bad rename to interface/lapack/zpotri.c index df325424e..b777c111f 100644 --- a/interface/lapack/zpotri.c.bad +++ b/interface/lapack/zpotri.c @@ -149,7 +149,10 @@ int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ blas_memory_free(buffer); #endif - FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n); + FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, .5 * args.n * args.n, + args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.)) + + args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.))); + IDEBUG_END; diff --git a/interface/lapack/ztrti2.c.bad b/interface/lapack/ztrti2.c similarity index 100% rename from interface/lapack/ztrti2.c.bad rename to interface/lapack/ztrti2.c diff --git a/interface/lapack/ztrtri.c.bad b/interface/lapack/ztrtri.c similarity index 100% rename from interface/lapack/ztrtri.c.bad rename to interface/lapack/ztrtri.c diff --git a/lapack-netlib/SRC/Makefile b/lapack-netlib/SRC/Makefile index d8cef80ba..5d59c4b00 100644 --- a/lapack-netlib/SRC/Makefile +++ b/lapack-netlib/SRC/Makefile @@ -120,14 +120,14 @@ SLASRC = \ slarrv.o slartv.o \ slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \ slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \ - slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \ + sopgtr.o sopmtr.o sorg2l.o sorg2r.o \ sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \ sorgrq.o sorgtr.o sorm2l.o sorm2r.o \ sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \ sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \ spbstf.o spbsv.o spbsvx.o \ spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \ - sposvx.o spotri.o spstrf.o spstf2.o \ + sposvx.o spstrf.o spstf2.o \ sppcon.o sppequ.o \ spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \ spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \ @@ -147,7 +147,7 @@ SLASRC = \ stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \ stptrs.o \ strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \ - strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \ + strtrs.o stzrqf.o stzrzf.o sstemr.o \ slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \ stfttr.o stpttf.o stpttr.o strttf.o strttp.o \ sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \ @@ -208,9 +208,9 @@ CLASRC = \ clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \ clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \ clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \ - clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \ + clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \ cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \ - cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \ + cposv.o cposvx.o cpstrf.o cpstf2.o \ cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \ cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \ crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \ @@ -225,7 +225,7 @@ CLASRC = \ ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \ ctprfs.o ctptri.o \ ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \ - ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \ + ctrsyl.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \ cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \ cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \ cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \ @@ -279,15 +279,15 @@ DLASRC = \ dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \ dlargv.o dlarrv.o dlartv.o \ dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \ - dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \ - dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \ + dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o \ + dopgtr.o dopmtr.o dorg2l.o dorg2r.o \ dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \ dorgrq.o dorgtr.o dorm2l.o dorm2r.o \ dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \ dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \ dpbstf.o dpbsv.o dpbsvx.o \ dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \ - dposvx.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \ + dposvx.o dpotrs.o dpstrf.o dpstf2.o \ dppcon.o dppequ.o \ dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \ dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \ @@ -307,7 +307,7 @@ DLASRC = \ dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \ dtptrs.o \ dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \ - dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \ + dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \ dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \ dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \ dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \ @@ -369,10 +369,10 @@ ZLASRC = \ zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \ zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \ zlassq.o zlasyf.o zlasyf_rook.o \ - zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \ - zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \ + zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o \ + zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \ zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \ - zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \ + zposv.o zposvx.o zpotrs.o zpstrf.o zpstf2.o \ zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \ zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \ zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \ @@ -387,7 +387,7 @@ ZLASRC = \ ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \ ztprfs.o ztptri.o \ ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \ - ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \ + ztrsyl.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \ zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \ zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \ zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \ diff --git a/lapack-netlib/TESTING/sep.in b/lapack-netlib/TESTING/sep.in index 7f9f54f52..e0ed58512 100644 --- a/lapack-netlib/TESTING/sep.in +++ b/lapack-netlib/TESTING/sep.in @@ -5,7 +5,7 @@ SEP: Data file for testing Symmetric Eigenvalue Problem routines 1 3 3 3 10 Values of NB (blocksize) 2 2 2 2 2 Values of NBMIN (minimum blocksize) 1 0 5 9 1 Values of NX (crossover point) -50.0 Threshold value +60.0 Threshold value T Put T to test the LAPACK routines T Put T to test the driver routines T Put T to test the error exits diff --git a/lapack-netlib/lapacke/src/Makefile b/lapack-netlib/lapacke/src/Makefile index 09790d0c8..1d5d1d25b 100644 --- a/lapack-netlib/lapacke/src/Makefile +++ b/lapack-netlib/lapacke/src/Makefile @@ -2072,9 +2072,9 @@ SOBJ_FILES := $(SSRC_OBJ) DOBJ_FILES := $(DSRC_OBJ) ZOBJ_FILES := $(ZSRC_OBJ) -ifdef LAPACKE_TESTING +# ifdef LAPACKE_TESTING ZOBJ_FILES += $(MATGEN_OBJ) -endif +#endif ALLOBJ = $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES) diff --git a/lapack/Makefile b/lapack/Makefile index f99416fa6..aff5209d5 100644 --- a/lapack/Makefile +++ b/lapack/Makefile @@ -2,7 +2,7 @@ TOPDIR = .. include ../Makefile.system #SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs -SUBDIRS = getrf getf2 laswp getrs potrf potf2 +SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri FLAMEDIRS = laswp getf2 potf2 lauu2 trti2 diff --git a/lapack/getri/cgetri.f b/lapack/getri/cgetri.f deleted file mode 100644 index 6840f531c..000000000 --- a/lapack/getri/cgetri.f +++ /dev/null @@ -1,194 +0,0 @@ - SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) -* -* -- LAPACK routine (version 3.0) -- -* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., -* Courant Institute, Argonne National Lab, and Rice University -* June 30, 1999 -* -* .. Scalar Arguments .. - INTEGER INFO, LDA, LWORK, N -* .. -* .. Array Arguments .. - INTEGER IPIV( * ) - COMPLEX A( LDA, * ), WORK( * ) -* .. -* -* Purpose -* ======= -* -* CGETRI computes the inverse of a matrix using the LU factorization -* computed by CGETRF. -* -* This method inverts U and then computes inv(A) by solving the system -* inv(A)*L = inv(U) for inv(A). -* -* Arguments -* ========= -* -* N (input) INTEGER -* The order of the matrix A. N >= 0. -* -* A (input/output) COMPLEX array, dimension (LDA,N) -* On entry, the factors L and U from the factorization -* A = P*L*U as computed by CGETRF. -* On exit, if INFO = 0, the inverse of the original matrix A. -* -* LDA (input) INTEGER -* The leading dimension of the array A. LDA >= max(1,N). -* -* IPIV (input) INTEGER array, dimension (N) -* The pivot indices from CGETRF; for 1<=i<=N, row i of the -* matrix was interchanged with row IPIV(i). -* -* WORK (workspace/output) COMPLEX array, dimension (LWORK) -* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. -* -* LWORK (input) INTEGER -* The dimension of the array WORK. LWORK >= max(1,N). -* For optimal performance LWORK >= N*NB, where NB is -* the optimal blocksize returned by ILAENV. -* -* If LWORK = -1, then a workspace query is assumed; the routine -* only calculates the optimal size of the WORK array, returns -* this value as the first entry of the WORK array, and no error -* message related to LWORK is issued by XERBLA. -* -* INFO (output) INTEGER -* = 0: successful exit -* < 0: if INFO = -i, the i-th argument had an illegal value -* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is -* singular and its inverse could not be computed. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX ZERO, ONE - PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ), - $ ONE = ( 1.0E+0, 0.0E+0 ) ) -* .. -* .. Local Scalars .. - LOGICAL LQUERY - INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, - $ NBMIN, NN -* .. -* .. External Functions .. - INTEGER ILAENV - EXTERNAL ILAENV -* .. -* .. External Subroutines .. - EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX, MIN -* .. -* .. Executable Statements .. -* -* Test the input parameters. -* - INFO = 0 - NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT - LQUERY = ( LWORK.EQ.-1 ) - IF( N.LT.0 ) THEN - INFO = -1 - ELSE IF( LDA.LT.MAX( 1, N ) ) THEN - INFO = -3 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -6 - END IF - IF( INFO.NE.0 ) THEN - CALL XERBLA( 'CGETRI', -INFO ) - RETURN - ELSE IF( LQUERY ) THEN - RETURN - END IF -* -* Quick return if possible -* - IF( N.EQ.0 ) - $ RETURN -* -* Form inv(U). If INFO > 0 from CTRTRI, then U is singular, -* and the inverse is not computed. -* - CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) - IF( INFO.GT.0 ) - $ RETURN -* - NBMIN = 2 - LDWORK = N - IF( NB.GT.1 .AND. NB.LT.N ) THEN - IWS = MAX( LDWORK*NB, 1 ) - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK - NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) ) - END IF - ELSE - IWS = N - END IF -* -* Solve the equation inv(A)*L = inv(U) for inv(A). -* - IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN -* -* Use unblocked code. -* - DO 20 J = N, 1, -1 -* -* Copy current column of L to WORK and replace with zeros. -* - DO 10 I = J + 1, N - WORK( I ) = A( I, J ) - A( I, J ) = ZERO - 10 CONTINUE -* -* Compute current column of inv(A). -* - IF( J.LT.N ) - $ CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), - $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) - 20 CONTINUE - ELSE -* -* Use blocked code. -* - NN = ( ( N-1 ) / NB )*NB + 1 - DO 50 J = NN, 1, -NB - JB = MIN( NB, N-J+1 ) -* -* Copy current block column of L to WORK and replace with -* zeros. -* - DO 40 JJ = J, J + JB - 1 - DO 30 I = JJ + 1, N - WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) - A( I, JJ ) = ZERO - 30 CONTINUE - 40 CONTINUE -* -* Compute current block column of inv(A). -* - IF( J+JB.LE.N ) - $ CALL CGEMM( 'No transpose', 'No transpose', N, JB, - $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, - $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) - CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, - $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) - 50 CONTINUE - END IF -* -* Apply column interchanges. -* - DO 60 J = N - 1, 1, -1 - JP = IPIV( J ) - IF( JP.NE.J ) - $ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) - 60 CONTINUE -* - WORK( 1 ) = IWS - RETURN -* -* End of CGETRI -* - END diff --git a/lapack/getri/dgetri.f b/lapack/getri/dgetri.f deleted file mode 100644 index c67a34803..000000000 --- a/lapack/getri/dgetri.f +++ /dev/null @@ -1,193 +0,0 @@ - SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) -* -* -- LAPACK routine (version 3.0) -- -* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., -* Courant Institute, Argonne National Lab, and Rice University -* June 30, 1999 -* -* .. Scalar Arguments .. - INTEGER INFO, LDA, LWORK, N -* .. -* .. Array Arguments .. - INTEGER IPIV( * ) - DOUBLE PRECISION A( LDA, * ), WORK( * ) -* .. -* -* Purpose -* ======= -* -* DGETRI computes the inverse of a matrix using the LU factorization -* computed by DGETRF. -* -* This method inverts U and then computes inv(A) by solving the system -* inv(A)*L = inv(U) for inv(A). -* -* Arguments -* ========= -* -* N (input) INTEGER -* The order of the matrix A. N >= 0. -* -* A (input/output) DOUBLE PRECISION array, dimension (LDA,N) -* On entry, the factors L and U from the factorization -* A = P*L*U as computed by DGETRF. -* On exit, if INFO = 0, the inverse of the original matrix A. -* -* LDA (input) INTEGER -* The leading dimension of the array A. LDA >= max(1,N). -* -* IPIV (input) INTEGER array, dimension (N) -* The pivot indices from DGETRF; for 1<=i<=N, row i of the -* matrix was interchanged with row IPIV(i). -* -* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK) -* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. -* -* LWORK (input) INTEGER -* The dimension of the array WORK. LWORK >= max(1,N). -* For optimal performance LWORK >= N*NB, where NB is -* the optimal blocksize returned by ILAENV. -* -* If LWORK = -1, then a workspace query is assumed; the routine -* only calculates the optimal size of the WORK array, returns -* this value as the first entry of the WORK array, and no error -* message related to LWORK is issued by XERBLA. -* -* INFO (output) INTEGER -* = 0: successful exit -* < 0: if INFO = -i, the i-th argument had an illegal value -* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is -* singular and its inverse could not be computed. -* -* ===================================================================== -* -* .. Parameters .. - DOUBLE PRECISION ZERO, ONE - PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 ) -* .. -* .. Local Scalars .. - LOGICAL LQUERY - INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, - $ NBMIN, NN -* .. -* .. External Functions .. - INTEGER ILAENV - EXTERNAL ILAENV -* .. -* .. External Subroutines .. - EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX, MIN -* .. -* .. Executable Statements .. -* -* Test the input parameters. -* - INFO = 0 - NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT - LQUERY = ( LWORK.EQ.-1 ) - IF( N.LT.0 ) THEN - INFO = -1 - ELSE IF( LDA.LT.MAX( 1, N ) ) THEN - INFO = -3 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -6 - END IF - IF( INFO.NE.0 ) THEN - CALL XERBLA( 'DGETRI', -INFO ) - RETURN - ELSE IF( LQUERY ) THEN - RETURN - END IF -* -* Quick return if possible -* - IF( N.EQ.0 ) - $ RETURN -* -* Form inv(U). If INFO > 0 from DTRTRI, then U is singular, -* and the inverse is not computed. -* - CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) - IF( INFO.GT.0 ) - $ RETURN -* - NBMIN = 2 - LDWORK = N - IF( NB.GT.1 .AND. NB.LT.N ) THEN - IWS = MAX( LDWORK*NB, 1 ) - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK - NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) ) - END IF - ELSE - IWS = N - END IF -* -* Solve the equation inv(A)*L = inv(U) for inv(A). -* - IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN -* -* Use unblocked code. -* - DO 20 J = N, 1, -1 -* -* Copy current column of L to WORK and replace with zeros. -* - DO 10 I = J + 1, N - WORK( I ) = A( I, J ) - A( I, J ) = ZERO - 10 CONTINUE -* -* Compute current column of inv(A). -* - IF( J.LT.N ) - $ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), - $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) - 20 CONTINUE - ELSE -* -* Use blocked code. -* - NN = ( ( N-1 ) / NB )*NB + 1 - DO 50 J = NN, 1, -NB - JB = MIN( NB, N-J+1 ) -* -* Copy current block column of L to WORK and replace with -* zeros. -* - DO 40 JJ = J, J + JB - 1 - DO 30 I = JJ + 1, N - WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) - A( I, JJ ) = ZERO - 30 CONTINUE - 40 CONTINUE -* -* Compute current block column of inv(A). -* - IF( J+JB.LE.N ) - $ CALL DGEMM( 'No transpose', 'No transpose', N, JB, - $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, - $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) - CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, - $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) - 50 CONTINUE - END IF -* -* Apply column interchanges. -* - DO 60 J = N - 1, 1, -1 - JP = IPIV( J ) - IF( JP.NE.J ) - $ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) - 60 CONTINUE -* - WORK( 1 ) = IWS - RETURN -* -* End of DGETRI -* - END diff --git a/lapack/getri/sgetri.f b/lapack/getri/sgetri.f deleted file mode 100644 index ec5932f16..000000000 --- a/lapack/getri/sgetri.f +++ /dev/null @@ -1,193 +0,0 @@ - SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) -* -* -- LAPACK routine (version 3.0) -- -* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., -* Courant Institute, Argonne National Lab, and Rice University -* June 30, 1999 -* -* .. Scalar Arguments .. - INTEGER INFO, LDA, LWORK, N -* .. -* .. Array Arguments .. - INTEGER IPIV( * ) - REAL A( LDA, * ), WORK( * ) -* .. -* -* Purpose -* ======= -* -* SGETRI computes the inverse of a matrix using the LU factorization -* computed by SGETRF. -* -* This method inverts U and then computes inv(A) by solving the system -* inv(A)*L = inv(U) for inv(A). -* -* Arguments -* ========= -* -* N (input) INTEGER -* The order of the matrix A. N >= 0. -* -* A (input/output) REAL array, dimension (LDA,N) -* On entry, the factors L and U from the factorization -* A = P*L*U as computed by SGETRF. -* On exit, if INFO = 0, the inverse of the original matrix A. -* -* LDA (input) INTEGER -* The leading dimension of the array A. LDA >= max(1,N). -* -* IPIV (input) INTEGER array, dimension (N) -* The pivot indices from SGETRF; for 1<=i<=N, row i of the -* matrix was interchanged with row IPIV(i). -* -* WORK (workspace/output) REAL array, dimension (LWORK) -* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. -* -* LWORK (input) INTEGER -* The dimension of the array WORK. LWORK >= max(1,N). -* For optimal performance LWORK >= N*NB, where NB is -* the optimal blocksize returned by ILAENV. -* -* If LWORK = -1, then a workspace query is assumed; the routine -* only calculates the optimal size of the WORK array, returns -* this value as the first entry of the WORK array, and no error -* message related to LWORK is issued by XERBLA. -* -* INFO (output) INTEGER -* = 0: successful exit -* < 0: if INFO = -i, the i-th argument had an illegal value -* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is -* singular and its inverse could not be computed. -* -* ===================================================================== -* -* .. Parameters .. - REAL ZERO, ONE - PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 ) -* .. -* .. Local Scalars .. - LOGICAL LQUERY - INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, - $ NBMIN, NN -* .. -* .. External Functions .. - INTEGER ILAENV - EXTERNAL ILAENV -* .. -* .. External Subroutines .. - EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX, MIN -* .. -* .. Executable Statements .. -* -* Test the input parameters. -* - INFO = 0 - NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT - LQUERY = ( LWORK.EQ.-1 ) - IF( N.LT.0 ) THEN - INFO = -1 - ELSE IF( LDA.LT.MAX( 1, N ) ) THEN - INFO = -3 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -6 - END IF - IF( INFO.NE.0 ) THEN - CALL XERBLA( 'SGETRI', -INFO ) - RETURN - ELSE IF( LQUERY ) THEN - RETURN - END IF -* -* Quick return if possible -* - IF( N.EQ.0 ) - $ RETURN -* -* Form inv(U). If INFO > 0 from STRTRI, then U is singular, -* and the inverse is not computed. -* - CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) - IF( INFO.GT.0 ) - $ RETURN -* - NBMIN = 2 - LDWORK = N - IF( NB.GT.1 .AND. NB.LT.N ) THEN - IWS = MAX( LDWORK*NB, 1 ) - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK - NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) ) - END IF - ELSE - IWS = N - END IF -* -* Solve the equation inv(A)*L = inv(U) for inv(A). -* - IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN -* -* Use unblocked code. -* - DO 20 J = N, 1, -1 -* -* Copy current column of L to WORK and replace with zeros. -* - DO 10 I = J + 1, N - WORK( I ) = A( I, J ) - A( I, J ) = ZERO - 10 CONTINUE -* -* Compute current column of inv(A). -* - IF( J.LT.N ) - $ CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), - $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) - 20 CONTINUE - ELSE -* -* Use blocked code. -* - NN = ( ( N-1 ) / NB )*NB + 1 - DO 50 J = NN, 1, -NB - JB = MIN( NB, N-J+1 ) -* -* Copy current block column of L to WORK and replace with -* zeros. -* - DO 40 JJ = J, J + JB - 1 - DO 30 I = JJ + 1, N - WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) - A( I, JJ ) = ZERO - 30 CONTINUE - 40 CONTINUE -* -* Compute current block column of inv(A). -* - IF( J+JB.LE.N ) - $ CALL SGEMM( 'No transpose', 'No transpose', N, JB, - $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, - $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) - CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, - $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) - 50 CONTINUE - END IF -* -* Apply column interchanges. -* - DO 60 J = N - 1, 1, -1 - JP = IPIV( J ) - IF( JP.NE.J ) - $ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) - 60 CONTINUE -* - WORK( 1 ) = IWS - RETURN -* -* End of SGETRI -* - END diff --git a/lapack/getri/zgetri.f b/lapack/getri/zgetri.f deleted file mode 100644 index 1eb4eb7f1..000000000 --- a/lapack/getri/zgetri.f +++ /dev/null @@ -1,194 +0,0 @@ - SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO ) -* -* -- LAPACK routine (version 3.0) -- -* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., -* Courant Institute, Argonne National Lab, and Rice University -* June 30, 1999 -* -* .. Scalar Arguments .. - INTEGER INFO, LDA, LWORK, N -* .. -* .. Array Arguments .. - INTEGER IPIV( * ) - COMPLEX*16 A( LDA, * ), WORK( * ) -* .. -* -* Purpose -* ======= -* -* ZGETRI computes the inverse of a matrix using the LU factorization -* computed by ZGETRF. -* -* This method inverts U and then computes inv(A) by solving the system -* inv(A)*L = inv(U) for inv(A). -* -* Arguments -* ========= -* -* N (input) INTEGER -* The order of the matrix A. N >= 0. -* -* A (input/output) COMPLEX*16 array, dimension (LDA,N) -* On entry, the factors L and U from the factorization -* A = P*L*U as computed by ZGETRF. -* On exit, if INFO = 0, the inverse of the original matrix A. -* -* LDA (input) INTEGER -* The leading dimension of the array A. LDA >= max(1,N). -* -* IPIV (input) INTEGER array, dimension (N) -* The pivot indices from ZGETRF; for 1<=i<=N, row i of the -* matrix was interchanged with row IPIV(i). -* -* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK) -* On exit, if INFO=0, then WORK(1) returns the optimal LWORK. -* -* LWORK (input) INTEGER -* The dimension of the array WORK. LWORK >= max(1,N). -* For optimal performance LWORK >= N*NB, where NB is -* the optimal blocksize returned by ILAENV. -* -* If LWORK = -1, then a workspace query is assumed; the routine -* only calculates the optimal size of the WORK array, returns -* this value as the first entry of the WORK array, and no error -* message related to LWORK is issued by XERBLA. -* -* INFO (output) INTEGER -* = 0: successful exit -* < 0: if INFO = -i, the i-th argument had an illegal value -* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is -* singular and its inverse could not be computed. -* -* ===================================================================== -* -* .. Parameters .. - COMPLEX*16 ZERO, ONE - PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ), - $ ONE = ( 1.0D+0, 0.0D+0 ) ) -* .. -* .. Local Scalars .. - LOGICAL LQUERY - INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB, - $ NBMIN, NN -* .. -* .. External Functions .. - INTEGER ILAENV - EXTERNAL ILAENV -* .. -* .. External Subroutines .. - EXTERNAL XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI -* .. -* .. Intrinsic Functions .. - INTRINSIC MAX, MIN -* .. -* .. Executable Statements .. -* -* Test the input parameters. -* - INFO = 0 - NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 ) - LWKOPT = N*NB - WORK( 1 ) = LWKOPT - LQUERY = ( LWORK.EQ.-1 ) - IF( N.LT.0 ) THEN - INFO = -1 - ELSE IF( LDA.LT.MAX( 1, N ) ) THEN - INFO = -3 - ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN - INFO = -6 - END IF - IF( INFO.NE.0 ) THEN - CALL XERBLA( 'ZGETRI', -INFO ) - RETURN - ELSE IF( LQUERY ) THEN - RETURN - END IF -* -* Quick return if possible -* - IF( N.EQ.0 ) - $ RETURN -* -* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular, -* and the inverse is not computed. -* - CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO ) - IF( INFO.GT.0 ) - $ RETURN -* - NBMIN = 2 - LDWORK = N - IF( NB.GT.1 .AND. NB.LT.N ) THEN - IWS = MAX( LDWORK*NB, 1 ) - IF( LWORK.LT.IWS ) THEN - NB = LWORK / LDWORK - NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) ) - END IF - ELSE - IWS = N - END IF -* -* Solve the equation inv(A)*L = inv(U) for inv(A). -* - IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN -* -* Use unblocked code. -* - DO 20 J = N, 1, -1 -* -* Copy current column of L to WORK and replace with zeros. -* - DO 10 I = J + 1, N - WORK( I ) = A( I, J ) - A( I, J ) = ZERO - 10 CONTINUE -* -* Compute current column of inv(A). -* - IF( J.LT.N ) - $ CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ), - $ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 ) - 20 CONTINUE - ELSE -* -* Use blocked code. -* - NN = ( ( N-1 ) / NB )*NB + 1 - DO 50 J = NN, 1, -NB - JB = MIN( NB, N-J+1 ) -* -* Copy current block column of L to WORK and replace with -* zeros. -* - DO 40 JJ = J, J + JB - 1 - DO 30 I = JJ + 1, N - WORK( I+( JJ-J )*LDWORK ) = A( I, JJ ) - A( I, JJ ) = ZERO - 30 CONTINUE - 40 CONTINUE -* -* Compute current block column of inv(A). -* - IF( J+JB.LE.N ) - $ CALL ZGEMM( 'No transpose', 'No transpose', N, JB, - $ N-J-JB+1, -ONE, A( 1, J+JB ), LDA, - $ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA ) - CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB, - $ ONE, WORK( J ), LDWORK, A( 1, J ), LDA ) - 50 CONTINUE - END IF -* -* Apply column interchanges. -* - DO 60 J = N - 1, 1, -1 - JP = IPIV( J ) - IF( JP.NE.J ) - $ CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 ) - 60 CONTINUE -* - WORK( 1 ) = IWS - RETURN -* -* End of ZGETRI -* - END diff --git a/lapack/trtri/trtri_L_single.c b/lapack/trtri/trtri_L_single.c index a940ce2f6..3e4343060 100644 --- a/lapack/trtri/trtri_L_single.c +++ b/lapack/trtri/trtri_L_single.c @@ -1,190 +1,113 @@ -/*********************************************************************/ -/* Copyright 2009, 2010 The University of Texas at Austin. */ -/* All rights reserved. */ -/* */ -/* Redistribution and use in source and binary forms, with or */ -/* without modification, are permitted provided that the following */ -/* conditions are met: */ -/* */ -/* 1. Redistributions of source code must retain the above */ -/* copyright notice, this list of conditions and the following */ -/* disclaimer. */ -/* */ -/* 2. Redistributions in binary form must reproduce the above */ -/* copyright notice, this list of conditions and the following */ -/* disclaimer in the documentation and/or other materials */ -/* provided with the distribution. */ -/* */ -/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ -/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ -/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ -/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ -/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ -/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ -/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ -/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ -/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ -/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ -/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ -/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ -/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ -/* POSSIBILITY OF SUCH DAMAGE. */ -/* */ -/* The views and conclusions contained in the software and */ -/* documentation are those of the authors and should not be */ -/* interpreted as representing official policies, either expressed */ -/* or implied, of The University of Texas at Austin. */ -/*********************************************************************/ +/*************************************************************************** + * Copyright (c) 2013, The OpenBLAS Project + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the OpenBLAS project nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ + +/************************************************************************************** +* 2014/05/22 Saar +* TEST double precision unblocked : OK +* 2014/05/23 Saar +* TEST double precision blocked: OK +* TEST single precision blocked: OK +**************************************************************************************/ #include #include "common.h" -static FLOAT dp1 = 1.; -static FLOAT dm1 = -1.; +// static FLOAT dp1 = 1.; +// static FLOAT dm1 = -1.; + #ifdef UNIT -#define TRTI2 TRTI2_LU +#define TRTI2 TRTI2_LU +#define TRMM TRMM_LNLU +#define TRSM TRSM_RNLU #else -#define TRTI2 TRTI2_LN +#define TRTI2 TRTI2_LN +#define TRMM TRMM_LNLN +#define TRSM TRSM_RNLN #endif -#if 0 -#undef GEMM_P -#undef GEMM_Q -#undef GEMM_R - -#define GEMM_P 8 -#define GEMM_Q 20 -#define GEMM_R 64 -#endif - -#define GEMM_PQ MAX(GEMM_P, GEMM_Q) -#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ) blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { - BLASLONG n, lda; + BLASLONG j, n, lda; FLOAT *a; - BLASLONG i, is, min_i, start_i; - BLASLONG ls, min_l; - BLASLONG bk; - BLASLONG blocking; - BLASLONG range_N[2]; + // BLASLONG info=0; + BLASLONG jb; + BLASLONG NB; + BLASLONG start_j; - FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb); - FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb - + GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN) - + GEMM_OFFSET_A); - FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm - + GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN) - + GEMM_OFFSET_B); + FLOAT beta_plus[2] = { ONE, ZERO}; + FLOAT beta_minus[2] = {-ONE, ZERO}; n = args -> n; - a = (FLOAT *)args -> a; - lda = args -> lda; - if (range_n) { - n = range_n[1] - range_n[0]; - a += range_n[0] * (lda + 1) * COMPSIZE; - } + NB = GEMM_Q; - if (n <= DTB_ENTRIES) { + if (n < NB) { TRTI2(args, NULL, range_n, sa, sb, 0); return 0; } - blocking = GEMM_Q; - if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4; - start_i = 0; - while (start_i < n) start_i += blocking; - start_i -= blocking; + lda = args -> lda; + a = (FLOAT *) args -> a; + args -> ldb = lda; + args -> ldc = lda; + args -> alpha = NULL; - for (i = start_i; i >= 0; i -= blocking) { - bk = MIN(blocking, n - i); - - if (n - bk - i > 0) TRSM_OLNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm); - - if (!range_n) { - range_N[0] = i; - range_N[1] = i + bk; - } else { - range_N[0] = range_n[0] + i; - range_N[1] = range_n[0] + i + bk; - } + start_j = 0; + while (start_j < n) start_j += NB; + start_j -= NB; - CNAME(args, NULL, range_N, sa, sa_trmm, 0); - if (i > 0) { - TRMM_ILTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm); + for (j = start_j ; j >=0 ; j-= NB) + { + jb = n - j; + if ( jb > NB ) jb = NB; - for (ls = 0; ls < i; ls += REAL_GEMM_R) { - min_l = i - ls; - if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R; - - GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm); - - if (n - bk - i > 0) { - for (is = i + bk; is < n; is += GEMM_P) { - min_i = n - is; - if (min_i > GEMM_P) min_i = GEMM_P; - - if (ls == 0) { - NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); + args -> n = jb; + args -> m = n-j-jb; - TRSM_KERNEL_RT(min_i, bk, bk, dm1, -#ifdef COMPLEX - ZERO, -#endif - sa, sa_trsm, - a + (is + i * lda) * COMPSIZE, lda, 0); - } else { - GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); - } + args -> a = &a[(j+jb+(j+jb)*lda) * COMPSIZE]; + args -> b = &a[(j+jb+j*lda) * COMPSIZE]; + args -> beta = beta_plus; - GEMM_KERNEL_N(min_i, min_l, bk, dp1, -#ifdef COMPLEX - ZERO, -#endif - sa, sb_gemm, - a + (is + ls * lda) * COMPSIZE, lda); - } - } - - for (is = 0; is < bk; is += GEMM_P) { - min_i = bk - is; - if (min_i > GEMM_P) min_i = GEMM_P; - - TRMM_KERNEL_LT(min_i, min_l, bk, dp1, -#ifdef COMPLEX - ZERO, -#endif - sa_trmm + is * bk * COMPSIZE, sb_gemm, - a + (i + is + ls * lda) * COMPSIZE, lda, is); - } - } + TRMM(args, NULL, NULL, sa, sb, 0); - } else { + args -> a = &a[(j+j*lda) * COMPSIZE]; + args -> beta = beta_minus; + + TRSM(args, NULL, NULL, sa, sb, 0); + + args -> a = &a[(j+j*lda) * COMPSIZE]; + + TRTI2(args, NULL, range_n, sa, sb, 0); - if (n - bk - i > 0) { - for (is = 0; is < n - bk - i; is += GEMM_P) { - min_i = n - bk - i - is; - if (min_i > GEMM_P) min_i = GEMM_P; - - NEG_TCOPY (bk, min_i, a + (i + bk + is + i * lda) * COMPSIZE, lda, sa); - - TRSM_KERNEL_RT(min_i, bk, bk, dm1, -#ifdef COMPLEX - ZERO, -#endif - sa, sa_trsm, - a + (i + bk + is + i * lda) * COMPSIZE, lda, 0); - } - } - - } } - return 0; } diff --git a/lapack/trtri/trtri_U_single.c b/lapack/trtri/trtri_U_single.c index c79281cfb..e4da5da59 100644 --- a/lapack/trtri/trtri_U_single.c +++ b/lapack/trtri/trtri_U_single.c @@ -1,46 +1,44 @@ -/*********************************************************************/ -/* Copyright 2009, 2010 The University of Texas at Austin. */ -/* All rights reserved. */ -/* */ -/* Redistribution and use in source and binary forms, with or */ -/* without modification, are permitted provided that the following */ -/* conditions are met: */ -/* */ -/* 1. Redistributions of source code must retain the above */ -/* copyright notice, this list of conditions and the following */ -/* disclaimer. */ -/* */ -/* 2. Redistributions in binary form must reproduce the above */ -/* copyright notice, this list of conditions and the following */ -/* disclaimer in the documentation and/or other materials */ -/* provided with the distribution. */ -/* */ -/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ -/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ -/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ -/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ -/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ -/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ -/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ -/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ -/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ -/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ -/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ -/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ -/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ -/* POSSIBILITY OF SUCH DAMAGE. */ -/* */ -/* The views and conclusions contained in the software and */ -/* documentation are those of the authors and should not be */ -/* interpreted as representing official policies, either expressed */ -/* or implied, of The University of Texas at Austin. */ -/*********************************************************************/ +/*************************************************************************** + * Copyright (c) 2013, The OpenBLAS Project + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * 3. Neither the name of the OpenBLAS project nor the names of + * its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE + * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * *****************************************************************************/ + +/************************************************************************************** +* 2014/05/22 Saar +* TEST double precision unblocked : OK +* TEST double precision blocked : OK +* 2014/05/23 +* TEST single precision blocked : OK +* +**************************************************************************************/ #include #include "common.h" -static FLOAT dp1 = 1.; -static FLOAT dm1 = -1.; +// static FLOAT dp1 = 1.; +// static FLOAT dm1 = -1.; #ifdef UNIT #define TRTI2 TRTI2_UU @@ -48,152 +46,66 @@ static FLOAT dm1 = -1.; #define TRTI2 TRTI2_UN #endif -#if 0 -#undef GEMM_P -#undef GEMM_Q -#undef GEMM_R - -#define GEMM_P 8 -#define GEMM_Q 20 -#define GEMM_R 64 +#ifdef UNIT +#define TRMM TRMM_LNUU +#define TRSM TRSM_RNUU +#else +#define TRMM TRMM_LNUN +#define TRSM TRSM_RNUN #endif -#define GEMM_PQ MAX(GEMM_P, GEMM_Q) -#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ) blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) { - BLASLONG n, lda; + BLASLONG j, n, lda; FLOAT *a; - BLASLONG i, is, min_i, start_is; - BLASLONG ls, min_l; - BLASLONG bk; - BLASLONG blocking; - BLASLONG range_N[2]; + // BLASLONG info=0; + BLASLONG jb; + BLASLONG NB; - FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb); - FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb - + GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN) - + GEMM_OFFSET_A); - FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm - + GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN) - + GEMM_OFFSET_B); + FLOAT beta_plus[2] = { ONE, ZERO}; + FLOAT beta_minus[2] = {-ONE, ZERO}; n = args -> n; - a = (FLOAT *)args -> a; - lda = args -> lda; - if (range_n) { - n = range_n[1] - range_n[0]; - a += range_n[0] * (lda + 1) * COMPSIZE; - } + NB = GEMM_Q; - if (n <= DTB_ENTRIES) { + if (n <= NB) { TRTI2(args, NULL, range_n, sa, sb, 0); return 0; } - blocking = GEMM_Q; - if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4; - for (i = 0; i < n; i += blocking) { - bk = MIN(blocking, n - i); - - if (i > 0) TRSM_OUNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm); + lda = args -> lda; + a = (FLOAT *) args -> a; + args -> ldb = lda; + args -> ldc = lda; + args -> alpha = NULL; - if (!range_n) { - range_N[0] = i; - range_N[1] = i + bk; - } else { - range_N[0] = range_n[0] + i; - range_N[1] = range_n[0] + i + bk; - } + for (j = 0; j < n; j += NB) + { + jb = n - j; + if ( jb > NB ) jb = NB; - CNAME(args, NULL, range_N, sa, sa_trmm, 0); + args -> n = jb; + args -> m = j; - if (n -bk - i > 0) { - TRMM_IUTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm); + args -> a = &a[0]; + args -> b = &a[(j*lda) * COMPSIZE]; + args -> beta = beta_plus; - for (ls = i + bk; ls < n; ls += REAL_GEMM_R) { - min_l = n - ls; - if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R; - - GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm); - - if (i > 0) { - for (is = 0; is < i; is += GEMM_P) { - min_i = i - is; - if (min_i > GEMM_P) min_i = GEMM_P; - - if (ls == i + bk) { - //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); + TRMM(args, NULL, NULL, sa, sb, 0); - GEMM_BETA(min_i, bk, 0, dm1, -#ifdef COMPLEX - ZERO, -#endif - NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); + args -> a = &a[(j+j*lda) * COMPSIZE]; + args -> beta = beta_minus; - TRSM_KERNEL_RN(min_i, bk, bk, dm1, -#ifdef COMPLEX - ZERO, -#endif - sa, sa_trsm, - a + (is + i * lda) * COMPSIZE, lda, 0); - } else { - GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); - } - - GEMM_KERNEL_N(min_i, min_l, bk, dp1, -#ifdef COMPLEX - ZERO, -#endif - sa, sb_gemm, - a + (is + ls * lda) * COMPSIZE, lda); - } - } - - start_is = 0; - while (start_is < bk) start_is += GEMM_P; - start_is -= GEMM_P; + TRSM(args, NULL, NULL, sa, sb, 0); - for (is = 0; is < bk; is += GEMM_P) { - min_i = bk - is; - if (min_i > GEMM_P) min_i = GEMM_P; - - TRMM_KERNEL_LN(min_i, min_l, bk, dp1, -#ifdef COMPLEX - ZERO, -#endif - sa_trmm + is * bk * COMPSIZE, sb_gemm, - a + (i + is + ls * lda) * COMPSIZE, lda, is); - } - } + args -> a = &a[(j+j*lda) * COMPSIZE]; - } else { - if (i > 0) { - for (is = 0; is < i; is += GEMM_P) { - min_i = i - is; - if (min_i > GEMM_P) min_i = GEMM_P; - - //NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa); - GEMM_BETA(min_i, bk, 0, dm1, -#ifdef COMPLEX - ZERO, -#endif - NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda); + TRTI2(args, NULL, range_n, sa, sb, 0); - TRSM_KERNEL_RN(min_i, bk, bk, dm1, -#ifdef COMPLEX - ZERO, -#endif - sa, sa_trsm, - a + (is + i * lda) * COMPSIZE, lda, 0); - } - } - } } - return 0; } diff --git a/make.inc b/make.inc index d3f91cbaa..affae3a2d 100644 --- a/make.inc +++ b/make.inc @@ -1,11 +1,7 @@ SHELL = /bin/sh PLAT = _LINUX DRVOPTS = $(OPTS) -LOADER = $(FORTRAN) -TIMER = NONE +LOADER = $(FORTRAN) -pthread ARCHFLAGS= -ru #RANLIB = ranlib -BLASLIB = ../../../libopenblas.a -TMGLIB = tmglib.a -#EIGSRCLIB = eigsrc.a -#LINSRCLIB = linsrc.a +