Merge pull request #376 from wernsaar/develop
Merged some Lapack optimized functions https://github.com/xianyi/OpenBLAS/wiki/Fixed-optimized-kernels-To-do-List
This commit is contained in:
commit
0ac073fa94
12
Makefile
12
Makefile
|
@ -207,6 +207,7 @@ else
|
||||||
netlib : lapack_prebuild
|
netlib : lapack_prebuild
|
||||||
ifndef NOFORTRAN
|
ifndef NOFORTRAN
|
||||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||||
|
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||||
endif
|
endif
|
||||||
ifndef NO_LAPACKE
|
ifndef NO_LAPACKE
|
||||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||||
|
@ -230,11 +231,18 @@ ifndef NOFORTRAN
|
||||||
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "ARCHFLAGS = -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
|
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
else
|
||||||
|
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -256,13 +264,12 @@ lapack-timing : large.tgz timing.tgz
|
||||||
ifndef NOFORTRAN
|
ifndef NOFORTRAN
|
||||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
|
||||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
|
||||||
make -C $(NETLIB_LAPACK_DIR) tmglib
|
|
||||||
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
make -C $(NETLIB_LAPACK_DIR)/TIMING
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
|
||||||
lapack-test :
|
lapack-test :
|
||||||
make -j 1 -C $(NETLIB_LAPACK_DIR) tmglib
|
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
|
|
||||||
|
@ -291,4 +298,5 @@ endif
|
||||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||||
@rm -f *.grd Makefile.conf_last config_last.h
|
@rm -f *.grd Makefile.conf_last config_last.h
|
||||||
|
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
|
||||||
@echo Done.
|
@echo Done.
|
||||||
|
|
|
@ -2667,34 +2667,34 @@
|
||||||
## @(MATGEN_OBJ) from `lapack-3.4.1/lapacke/src/Makefile`
|
## @(MATGEN_OBJ) from `lapack-3.4.1/lapacke/src/Makefile`
|
||||||
## Not exported: requires LAPACKE_TESTING to be set and depends on libtmg
|
## Not exported: requires LAPACKE_TESTING to be set and depends on libtmg
|
||||||
## (see `lapack-3.4.1/TESTING/MATGEN`).
|
## (see `lapack-3.4.1/TESTING/MATGEN`).
|
||||||
#LAPACKE_clatms,
|
LAPACKE_clatms,
|
||||||
#LAPACKE_clatms_work,
|
LAPACKE_clatms_work,
|
||||||
#LAPACKE_dlatms,
|
LAPACKE_dlatms,
|
||||||
#LAPACKE_dlatms_work,
|
LAPACKE_dlatms_work,
|
||||||
#LAPACKE_slatms,
|
LAPACKE_slatms,
|
||||||
#LAPACKE_slatms_work,
|
LAPACKE_slatms_work,
|
||||||
#LAPACKE_zlatms,
|
LAPACKE_zlatms,
|
||||||
#LAPACKE_zlatms_work,
|
LAPACKE_zlatms_work,
|
||||||
#LAPACKE_clagge,
|
LAPACKE_clagge,
|
||||||
#LAPACKE_clagge_work,
|
LAPACKE_clagge_work,
|
||||||
#LAPACKE_dlagge,
|
LAPACKE_dlagge,
|
||||||
#LAPACKE_dlagge_work,
|
LAPACKE_dlagge_work,
|
||||||
#LAPACKE_slagge,
|
LAPACKE_slagge,
|
||||||
#LAPACKE_slagge_work,
|
LAPACKE_slagge_work,
|
||||||
#LAPACKE_zlagge,
|
LAPACKE_zlagge,
|
||||||
#LAPACKE_zlagge_work,
|
LAPACKE_zlagge_work,
|
||||||
#LAPACKE_claghe,
|
LAPACKE_claghe,
|
||||||
#LAPACKE_claghe_work,
|
LAPACKE_claghe_work,
|
||||||
#LAPACKE_zlaghe,
|
LAPACKE_zlaghe,
|
||||||
#LAPACKE_zlaghe_work,
|
LAPACKE_zlaghe_work,
|
||||||
#LAPACKE_clagsy,
|
LAPACKE_clagsy,
|
||||||
#LAPACKE_clagsy_work,
|
LAPACKE_clagsy_work,
|
||||||
#LAPACKE_dlagsy,
|
LAPACKE_dlagsy,
|
||||||
#LAPACKE_dlagsy_work,
|
LAPACKE_dlagsy_work,
|
||||||
#LAPACKE_slagsy,
|
LAPACKE_slagsy,
|
||||||
#LAPACKE_slagsy_work,
|
LAPACKE_slagsy_work,
|
||||||
#LAPACKE_zlagsy,
|
LAPACKE_zlagsy,
|
||||||
#LAPACKE_zlagsy_work,
|
LAPACKE_zlagsy_work,
|
||||||
);
|
);
|
||||||
|
|
||||||
#These function may need 2 underscores.
|
#These function may need 2 underscores.
|
||||||
|
|
|
@ -349,7 +349,8 @@ XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS)
|
||||||
|
|
||||||
SLAPACKOBJS = \
|
SLAPACKOBJS = \
|
||||||
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \
|
||||||
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX)
|
spotf2.$(SUFFIX) slaswp.$(SUFFIX) sgesv.$(SUFFIX) slauu2.$(SUFFIX) \
|
||||||
|
slauum.$(SUFFIX) strti2.$(SUFFIX) strtri.$(SUFFIX) spotri.$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
#DLAPACKOBJS = \
|
#DLAPACKOBJS = \
|
||||||
|
@ -359,7 +360,8 @@ SLAPACKOBJS = \
|
||||||
|
|
||||||
DLAPACKOBJS = \
|
DLAPACKOBJS = \
|
||||||
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
|
dgetrf.$(SUFFIX) dgetrs.$(SUFFIX) dpotrf.$(SUFFIX) dgetf2.$(SUFFIX) \
|
||||||
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX)
|
dpotf2.$(SUFFIX) dlaswp.$(SUFFIX) dgesv.$(SUFFIX) dlauu2.$(SUFFIX) \
|
||||||
|
dlauum.$(SUFFIX) dtrti2.$(SUFFIX) dtrtri.$(SUFFIX) dpotri.$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
QLAPACKOBJS = \
|
QLAPACKOBJS = \
|
||||||
|
@ -374,7 +376,8 @@ QLAPACKOBJS = \
|
||||||
|
|
||||||
CLAPACKOBJS = \
|
CLAPACKOBJS = \
|
||||||
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
|
cgetrf.$(SUFFIX) cgetrs.$(SUFFIX) cpotrf.$(SUFFIX) cgetf2.$(SUFFIX) \
|
||||||
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX)
|
cpotf2.$(SUFFIX) claswp.$(SUFFIX) cgesv.$(SUFFIX) clauu2.$(SUFFIX) \
|
||||||
|
clauum.$(SUFFIX) ctrti2.$(SUFFIX) ctrtri.$(SUFFIX) cpotri.$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
#ZLAPACKOBJS = \
|
#ZLAPACKOBJS = \
|
||||||
|
@ -384,7 +387,9 @@ CLAPACKOBJS = \
|
||||||
|
|
||||||
ZLAPACKOBJS = \
|
ZLAPACKOBJS = \
|
||||||
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
|
zgetrf.$(SUFFIX) zgetrs.$(SUFFIX) zpotrf.$(SUFFIX) zgetf2.$(SUFFIX) \
|
||||||
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX)
|
zpotf2.$(SUFFIX) zlaswp.$(SUFFIX) zgesv.$(SUFFIX) zlauu2.$(SUFFIX) \
|
||||||
|
zlauum.$(SUFFIX) ztrti2.$(SUFFIX) ztrtri.$(SUFFIX) zpotri.$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -1788,37 +1793,37 @@ zgetrf.$(SUFFIX) zgetrf.$(PSUFFIX) : lapack/zgetrf.c
|
||||||
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
|
xgetrf.$(SUFFIX) xgetrf.$(PSUFFIX) : zgetrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lauu2.c
|
slauu2.$(SUFFIX) slauu2.$(PSUFFIX) : lapack/lauu2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lauu2.c
|
dlauu2.$(SUFFIX) dlauu2.$(PSUFFIX) : lapack/lauu2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c
|
qlauu2.$(SUFFIX) qlauu2.$(PSUFFIX) : lauu2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : zlauu2.c
|
clauu2.$(SUFFIX) clauu2.$(PSUFFIX) : lapack/zlauu2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : zlauu2.c
|
zlauu2.$(SUFFIX) zlauu2.$(PSUFFIX) : lapack/zlauu2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c
|
xlauu2.$(SUFFIX) xlauu2.$(PSUFFIX) : zlauu2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
slauum.$(SUFFIX) slauum.$(PSUFFIX) : lauum.c
|
slauum.$(SUFFIX) slauum.$(PSUFFIX) : lapack/lauum.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lauum.c
|
dlauum.$(SUFFIX) dlauum.$(PSUFFIX) : lapack/lauum.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c
|
qlauum.$(SUFFIX) qlauum.$(PSUFFIX) : lauum.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
clauum.$(SUFFIX) clauum.$(PSUFFIX) : zlauum.c
|
clauum.$(SUFFIX) clauum.$(PSUFFIX) : lapack/zlauum.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : zlauum.c
|
zlauum.$(SUFFIX) zlauum.$(PSUFFIX) : lapack/zlauum.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
|
xlauum.$(SUFFIX) xlauum.$(PSUFFIX) : zlauum.c
|
||||||
|
@ -1860,37 +1865,37 @@ zpotrf.$(SUFFIX) zpotrf.$(PSUFFIX) : lapack/zpotrf.c
|
||||||
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
|
xpotrf.$(SUFFIX) xpotrf.$(PSUFFIX) : zpotrf.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
strti2.$(SUFFIX) strti2.$(PSUFFIX) : trti2.c
|
strti2.$(SUFFIX) strti2.$(PSUFFIX) : lapack/trti2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : trti2.c
|
dtrti2.$(SUFFIX) dtrti2.$(PSUFFIX) : lapack/trti2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c
|
qtrti2.$(SUFFIX) qtrti2.$(PSUFFIX) : trti2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : ztrti2.c
|
ctrti2.$(SUFFIX) ctrti2.$(PSUFFIX) : lapack/ztrti2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : ztrti2.c
|
ztrti2.$(SUFFIX) ztrti2.$(PSUFFIX) : lapack/ztrti2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c
|
xtrti2.$(SUFFIX) xtrti2.$(PSUFFIX) : ztrti2.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
strtri.$(SUFFIX) strtri.$(PSUFFIX) : trtri.c
|
strtri.$(SUFFIX) strtri.$(PSUFFIX) : lapack/trtri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : trtri.c
|
dtrtri.$(SUFFIX) dtrtri.$(PSUFFIX) : lapack/trtri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c
|
qtrtri.$(SUFFIX) qtrtri.$(PSUFFIX) : trtri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : ztrtri.c
|
ctrtri.$(SUFFIX) ctrtri.$(PSUFFIX) : lapack/ztrtri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : ztrtri.c
|
ztrtri.$(SUFFIX) ztrtri.$(PSUFFIX) : lapack/ztrtri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
|
xtrtri.$(SUFFIX) xtrtri.$(PSUFFIX) : ztrtri.c
|
||||||
|
@ -1950,19 +1955,19 @@ zgesv.$(SUFFIX) zgesv.$(PSUFFIX) : lapack/gesv.c
|
||||||
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c
|
xgesv.$(SUFFIX) xgesv.$(PSUFFIX) : gesv.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
spotri.$(SUFFIX) spotri.$(PSUFFIX) : potri.c
|
spotri.$(SUFFIX) spotri.$(PSUFFIX) : lapack/potri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : potri.c
|
dpotri.$(SUFFIX) dpotri.$(PSUFFIX) : lapack/potri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c
|
qpotri.$(SUFFIX) qpotri.$(PSUFFIX) : potri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : zpotri.c
|
cpotri.$(SUFFIX) cpotri.$(PSUFFIX) : lapack/zpotri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : zpotri.c
|
zpotri.$(SUFFIX) zpotri.$(PSUFFIX) : lapack/zpotri.c
|
||||||
$(CC) -c $(CFLAGS) $< -o $(@F)
|
$(CC) -c $(CFLAGS) $< -o $(@F)
|
||||||
|
|
||||||
xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c
|
xpotri.$(SUFFIX) xpotri.$(PSUFFIX) : zpotri.c
|
||||||
|
|
|
@ -149,7 +149,10 @@ int NAME(char *UPLO, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){
|
||||||
blas_memory_free(buffer);
|
blas_memory_free(buffer);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, args.m * args.n, 2. / 3. * args.m * args.n * args.n);
|
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, .5 * args.n * args.n,
|
||||||
|
args.n * (1./3. + args.n * ( 1./2. + args.n * 1./6.))
|
||||||
|
+ args.n * (1./3. + args.n * (-1./2. + args.n * 1./6.)));
|
||||||
|
|
||||||
|
|
||||||
IDEBUG_END;
|
IDEBUG_END;
|
||||||
|
|
|
@ -120,14 +120,14 @@ SLASRC = \
|
||||||
slarrv.o slartv.o \
|
slarrv.o slartv.o \
|
||||||
slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \
|
slarz.o slarzb.o slarzt.o slasy2.o slasyf.o slasyf_rook.o \
|
||||||
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
|
slatbs.o slatdf.o slatps.o slatrd.o slatrs.o slatrz.o slatzm.o \
|
||||||
slauu2.o slauum.o sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
sopgtr.o sopmtr.o sorg2l.o sorg2r.o \
|
||||||
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
|
sorgbr.o sorghr.o sorgl2.o sorglq.o sorgql.o sorgqr.o sorgr2.o \
|
||||||
sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
|
sorgrq.o sorgtr.o sorm2l.o sorm2r.o \
|
||||||
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
|
sormbr.o sormhr.o sorml2.o sormlq.o sormql.o sormqr.o sormr2.o \
|
||||||
sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
|
sormr3.o sormrq.o sormrz.o sormtr.o spbcon.o spbequ.o spbrfs.o \
|
||||||
spbstf.o spbsv.o spbsvx.o \
|
spbstf.o spbsv.o spbsvx.o \
|
||||||
spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
|
spbtf2.o spbtrf.o spbtrs.o spocon.o spoequ.o sporfs.o sposv.o \
|
||||||
sposvx.o spotri.o spstrf.o spstf2.o \
|
sposvx.o spstrf.o spstf2.o \
|
||||||
sppcon.o sppequ.o \
|
sppcon.o sppequ.o \
|
||||||
spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
|
spprfs.o sppsv.o sppsvx.o spptrf.o spptri.o spptrs.o sptcon.o \
|
||||||
spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
|
spteqr.o sptrfs.o sptsv.o sptsvx.o spttrs.o sptts2.o srscl.o \
|
||||||
|
@ -147,7 +147,7 @@ SLASRC = \
|
||||||
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
|
stgsja.o stgsna.o stgsy2.o stgsyl.o stpcon.o stprfs.o stptri.o \
|
||||||
stptrs.o \
|
stptrs.o \
|
||||||
strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
|
strcon.o strevc.o strexc.o strrfs.o strsen.o strsna.o strsyl.o \
|
||||||
strti2.o strtri.o strtrs.o stzrqf.o stzrzf.o sstemr.o \
|
strtrs.o stzrqf.o stzrzf.o sstemr.o \
|
||||||
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
|
slansf.o spftrf.o spftri.o spftrs.o ssfrk.o stfsm.o stftri.o stfttp.o \
|
||||||
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
|
stfttr.o stpttf.o stpttr.o strttf.o strttp.o \
|
||||||
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
|
sgejsv.o sgesvj.o sgsvj0.o sgsvj1.o \
|
||||||
|
@ -208,9 +208,9 @@ CLASRC = \
|
||||||
clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
|
clarfx.o clargv.o clarnv.o clarrv.o clartg.o clartv.o \
|
||||||
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
|
clarz.o clarzb.o clarzt.o clascl.o claset.o clasr.o classq.o \
|
||||||
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
clasyf.o clasyf_rook.o clatbs.o clatdf.o clatps.o clatrd.o clatrs.o clatrz.o \
|
||||||
clatzm.o clauu2.o clauum.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
clatzm.o cpbcon.o cpbequ.o cpbrfs.o cpbstf.o cpbsv.o \
|
||||||
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
cpbsvx.o cpbtf2.o cpbtrf.o cpbtrs.o cpocon.o cpoequ.o cporfs.o \
|
||||||
cposv.o cposvx.o cpotri.o cpstrf.o cpstf2.o \
|
cposv.o cposvx.o cpstrf.o cpstf2.o \
|
||||||
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
cppcon.o cppequ.o cpprfs.o cppsv.o cppsvx.o cpptrf.o cpptri.o cpptrs.o \
|
||||||
cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
|
cptcon.o cpteqr.o cptrfs.o cptsv.o cptsvx.o cpttrf.o cpttrs.o cptts2.o \
|
||||||
crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
|
crot.o cspcon.o cspmv.o cspr.o csprfs.o cspsv.o \
|
||||||
|
@ -225,7 +225,7 @@ CLASRC = \
|
||||||
ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
|
ctgexc.o ctgsen.o ctgsja.o ctgsna.o ctgsy2.o ctgsyl.o ctpcon.o \
|
||||||
ctprfs.o ctptri.o \
|
ctprfs.o ctptri.o \
|
||||||
ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
|
ctptrs.o ctrcon.o ctrevc.o ctrexc.o ctrrfs.o ctrsen.o ctrsna.o \
|
||||||
ctrsyl.o ctrti2.o ctrtri.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
ctrsyl.o ctrtrs.o ctzrqf.o ctzrzf.o cung2l.o cung2r.o \
|
||||||
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
|
cungbr.o cunghr.o cungl2.o cunglq.o cungql.o cungqr.o cungr2.o \
|
||||||
cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
|
cungrq.o cungtr.o cunm2l.o cunm2r.o cunmbr.o cunmhr.o cunml2.o \
|
||||||
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
|
cunmlq.o cunmql.o cunmqr.o cunmr2.o cunmr3.o cunmrq.o cunmrz.o \
|
||||||
|
@ -279,15 +279,15 @@ DLASRC = \
|
||||||
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
|
dlarf.o dlarfb.o dlarfg.o dlarfgp.o dlarft.o dlarfx.o \
|
||||||
dlargv.o dlarrv.o dlartv.o \
|
dlargv.o dlarrv.o dlartv.o \
|
||||||
dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \
|
dlarz.o dlarzb.o dlarzt.o dlasy2.o dlasyf.o dlasyf_rook.o \
|
||||||
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o dlauu2.o \
|
dlatbs.o dlatdf.o dlatps.o dlatrd.o dlatrs.o dlatrz.o dlatzm.o \
|
||||||
dlauum.o dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
dopgtr.o dopmtr.o dorg2l.o dorg2r.o \
|
||||||
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
|
dorgbr.o dorghr.o dorgl2.o dorglq.o dorgql.o dorgqr.o dorgr2.o \
|
||||||
dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
|
dorgrq.o dorgtr.o dorm2l.o dorm2r.o \
|
||||||
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
|
dormbr.o dormhr.o dorml2.o dormlq.o dormql.o dormqr.o dormr2.o \
|
||||||
dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
|
dormr3.o dormrq.o dormrz.o dormtr.o dpbcon.o dpbequ.o dpbrfs.o \
|
||||||
dpbstf.o dpbsv.o dpbsvx.o \
|
dpbstf.o dpbsv.o dpbsvx.o \
|
||||||
dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
|
dpbtf2.o dpbtrf.o dpbtrs.o dpocon.o dpoequ.o dporfs.o dposv.o \
|
||||||
dposvx.o dpotri.o dpotrs.o dpstrf.o dpstf2.o \
|
dposvx.o dpotrs.o dpstrf.o dpstf2.o \
|
||||||
dppcon.o dppequ.o \
|
dppcon.o dppequ.o \
|
||||||
dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
|
dpprfs.o dppsv.o dppsvx.o dpptrf.o dpptri.o dpptrs.o dptcon.o \
|
||||||
dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
|
dpteqr.o dptrfs.o dptsv.o dptsvx.o dpttrs.o dptts2.o drscl.o \
|
||||||
|
@ -307,7 +307,7 @@ DLASRC = \
|
||||||
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
|
dtgsja.o dtgsna.o dtgsy2.o dtgsyl.o dtpcon.o dtprfs.o dtptri.o \
|
||||||
dtptrs.o \
|
dtptrs.o \
|
||||||
dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
|
dtrcon.o dtrevc.o dtrexc.o dtrrfs.o dtrsen.o dtrsna.o dtrsyl.o \
|
||||||
dtrti2.o dtrtri.o dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
dtrtrs.o dtzrqf.o dtzrzf.o dstemr.o \
|
||||||
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
|
dsgesv.o dsposv.o dlag2s.o slag2d.o dlat2s.o \
|
||||||
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
|
dlansf.o dpftrf.o dpftri.o dpftrs.o dsfrk.o dtfsm.o dtftri.o dtfttp.o \
|
||||||
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
|
dtfttr.o dtpttf.o dtpttr.o dtrttf.o dtrttp.o \
|
||||||
|
@ -369,10 +369,10 @@ ZLASRC = \
|
||||||
zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
|
zlarfx.o zlargv.o zlarnv.o zlarrv.o zlartg.o zlartv.o \
|
||||||
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
zlarz.o zlarzb.o zlarzt.o zlascl.o zlaset.o zlasr.o \
|
||||||
zlassq.o zlasyf.o zlasyf_rook.o \
|
zlassq.o zlasyf.o zlasyf_rook.o \
|
||||||
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o zlauu2.o \
|
zlatbs.o zlatdf.o zlatps.o zlatrd.o zlatrs.o zlatrz.o zlatzm.o \
|
||||||
zlauum.o zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
zpbcon.o zpbequ.o zpbrfs.o zpbstf.o zpbsv.o \
|
||||||
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
zpbsvx.o zpbtf2.o zpbtrf.o zpbtrs.o zpocon.o zpoequ.o zporfs.o \
|
||||||
zposv.o zposvx.o zpotri.o zpotrs.o zpstrf.o zpstf2.o \
|
zposv.o zposvx.o zpotrs.o zpstrf.o zpstf2.o \
|
||||||
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
zppcon.o zppequ.o zpprfs.o zppsv.o zppsvx.o zpptrf.o zpptri.o zpptrs.o \
|
||||||
zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
|
zptcon.o zpteqr.o zptrfs.o zptsv.o zptsvx.o zpttrf.o zpttrs.o zptts2.o \
|
||||||
zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
|
zrot.o zspcon.o zspmv.o zspr.o zsprfs.o zspsv.o \
|
||||||
|
@ -387,7 +387,7 @@ ZLASRC = \
|
||||||
ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
|
ztgexc.o ztgsen.o ztgsja.o ztgsna.o ztgsy2.o ztgsyl.o ztpcon.o \
|
||||||
ztprfs.o ztptri.o \
|
ztprfs.o ztptri.o \
|
||||||
ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
|
ztptrs.o ztrcon.o ztrevc.o ztrexc.o ztrrfs.o ztrsen.o ztrsna.o \
|
||||||
ztrsyl.o ztrti2.o ztrtri.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
ztrsyl.o ztrtrs.o ztzrqf.o ztzrzf.o zung2l.o \
|
||||||
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
|
zung2r.o zungbr.o zunghr.o zungl2.o zunglq.o zungql.o zungqr.o zungr2.o \
|
||||||
zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
|
zungrq.o zungtr.o zunm2l.o zunm2r.o zunmbr.o zunmhr.o zunml2.o \
|
||||||
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
|
zunmlq.o zunmql.o zunmqr.o zunmr2.o zunmr3.o zunmrq.o zunmrz.o \
|
||||||
|
|
|
@ -5,7 +5,7 @@ SEP: Data file for testing Symmetric Eigenvalue Problem routines
|
||||||
1 3 3 3 10 Values of NB (blocksize)
|
1 3 3 3 10 Values of NB (blocksize)
|
||||||
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
2 2 2 2 2 Values of NBMIN (minimum blocksize)
|
||||||
1 0 5 9 1 Values of NX (crossover point)
|
1 0 5 9 1 Values of NX (crossover point)
|
||||||
50.0 Threshold value
|
60.0 Threshold value
|
||||||
T Put T to test the LAPACK routines
|
T Put T to test the LAPACK routines
|
||||||
T Put T to test the driver routines
|
T Put T to test the driver routines
|
||||||
T Put T to test the error exits
|
T Put T to test the error exits
|
||||||
|
|
|
@ -2072,9 +2072,9 @@ SOBJ_FILES := $(SSRC_OBJ)
|
||||||
DOBJ_FILES := $(DSRC_OBJ)
|
DOBJ_FILES := $(DSRC_OBJ)
|
||||||
ZOBJ_FILES := $(ZSRC_OBJ)
|
ZOBJ_FILES := $(ZSRC_OBJ)
|
||||||
|
|
||||||
ifdef LAPACKE_TESTING
|
# ifdef LAPACKE_TESTING
|
||||||
ZOBJ_FILES += $(MATGEN_OBJ)
|
ZOBJ_FILES += $(MATGEN_OBJ)
|
||||||
endif
|
#endif
|
||||||
|
|
||||||
ALLOBJ = $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES)
|
ALLOBJ = $(COBJ_FILES) $(DOBJ_FILES) $(SOBJ_FILES) $(ZOBJ_FILES) $(OBJ_FILES)
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ TOPDIR = ..
|
||||||
include ../Makefile.system
|
include ../Makefile.system
|
||||||
|
|
||||||
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
#SUBDIRS = laswp getf2 getrf potf2 potrf lauu2 lauum trti2 trtri getrs
|
||||||
SUBDIRS = getrf getf2 laswp getrs potrf potf2
|
SUBDIRS = getrf getf2 laswp getrs potrf potf2 lauu2 lauum trti2 trtri
|
||||||
|
|
||||||
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2
|
FLAMEDIRS = laswp getf2 potf2 lauu2 trti2
|
||||||
|
|
||||||
|
|
|
@ -1,194 +0,0 @@
|
||||||
SUBROUTINE CGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
|
||||||
*
|
|
||||||
* -- LAPACK routine (version 3.0) --
|
|
||||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
|
||||||
* Courant Institute, Argonne National Lab, and Rice University
|
|
||||||
* June 30, 1999
|
|
||||||
*
|
|
||||||
* .. Scalar Arguments ..
|
|
||||||
INTEGER INFO, LDA, LWORK, N
|
|
||||||
* ..
|
|
||||||
* .. Array Arguments ..
|
|
||||||
INTEGER IPIV( * )
|
|
||||||
COMPLEX A( LDA, * ), WORK( * )
|
|
||||||
* ..
|
|
||||||
*
|
|
||||||
* Purpose
|
|
||||||
* =======
|
|
||||||
*
|
|
||||||
* CGETRI computes the inverse of a matrix using the LU factorization
|
|
||||||
* computed by CGETRF.
|
|
||||||
*
|
|
||||||
* This method inverts U and then computes inv(A) by solving the system
|
|
||||||
* inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
* Arguments
|
|
||||||
* =========
|
|
||||||
*
|
|
||||||
* N (input) INTEGER
|
|
||||||
* The order of the matrix A. N >= 0.
|
|
||||||
*
|
|
||||||
* A (input/output) COMPLEX array, dimension (LDA,N)
|
|
||||||
* On entry, the factors L and U from the factorization
|
|
||||||
* A = P*L*U as computed by CGETRF.
|
|
||||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
|
||||||
*
|
|
||||||
* LDA (input) INTEGER
|
|
||||||
* The leading dimension of the array A. LDA >= max(1,N).
|
|
||||||
*
|
|
||||||
* IPIV (input) INTEGER array, dimension (N)
|
|
||||||
* The pivot indices from CGETRF; for 1<=i<=N, row i of the
|
|
||||||
* matrix was interchanged with row IPIV(i).
|
|
||||||
*
|
|
||||||
* WORK (workspace/output) COMPLEX array, dimension (LWORK)
|
|
||||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
|
||||||
*
|
|
||||||
* LWORK (input) INTEGER
|
|
||||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
|
||||||
* For optimal performance LWORK >= N*NB, where NB is
|
|
||||||
* the optimal blocksize returned by ILAENV.
|
|
||||||
*
|
|
||||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
|
||||||
* only calculates the optimal size of the WORK array, returns
|
|
||||||
* this value as the first entry of the WORK array, and no error
|
|
||||||
* message related to LWORK is issued by XERBLA.
|
|
||||||
*
|
|
||||||
* INFO (output) INTEGER
|
|
||||||
* = 0: successful exit
|
|
||||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
|
||||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
|
||||||
* singular and its inverse could not be computed.
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Parameters ..
|
|
||||||
COMPLEX ZERO, ONE
|
|
||||||
PARAMETER ( ZERO = ( 0.0E+0, 0.0E+0 ),
|
|
||||||
$ ONE = ( 1.0E+0, 0.0E+0 ) )
|
|
||||||
* ..
|
|
||||||
* .. Local Scalars ..
|
|
||||||
LOGICAL LQUERY
|
|
||||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
|
||||||
$ NBMIN, NN
|
|
||||||
* ..
|
|
||||||
* .. External Functions ..
|
|
||||||
INTEGER ILAENV
|
|
||||||
EXTERNAL ILAENV
|
|
||||||
* ..
|
|
||||||
* .. External Subroutines ..
|
|
||||||
EXTERNAL CGEMM, CGEMV, CSWAP, CTRSM, CTRTRI, XERBLA
|
|
||||||
* ..
|
|
||||||
* .. Intrinsic Functions ..
|
|
||||||
INTRINSIC MAX, MIN
|
|
||||||
* ..
|
|
||||||
* .. Executable Statements ..
|
|
||||||
*
|
|
||||||
* Test the input parameters.
|
|
||||||
*
|
|
||||||
INFO = 0
|
|
||||||
NB = ILAENV( 1, 'CGETRI', ' ', N, -1, -1, -1 )
|
|
||||||
LWKOPT = N*NB
|
|
||||||
WORK( 1 ) = LWKOPT
|
|
||||||
LQUERY = ( LWORK.EQ.-1 )
|
|
||||||
IF( N.LT.0 ) THEN
|
|
||||||
INFO = -1
|
|
||||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
|
||||||
INFO = -3
|
|
||||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
|
||||||
INFO = -6
|
|
||||||
END IF
|
|
||||||
IF( INFO.NE.0 ) THEN
|
|
||||||
CALL XERBLA( 'CGETRI', -INFO )
|
|
||||||
RETURN
|
|
||||||
ELSE IF( LQUERY ) THEN
|
|
||||||
RETURN
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Quick return if possible
|
|
||||||
*
|
|
||||||
IF( N.EQ.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
* Form inv(U). If INFO > 0 from CTRTRI, then U is singular,
|
|
||||||
* and the inverse is not computed.
|
|
||||||
*
|
|
||||||
CALL CTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
|
||||||
IF( INFO.GT.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
NBMIN = 2
|
|
||||||
LDWORK = N
|
|
||||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
|
||||||
IWS = MAX( LDWORK*NB, 1 )
|
|
||||||
IF( LWORK.LT.IWS ) THEN
|
|
||||||
NB = LWORK / LDWORK
|
|
||||||
NBMIN = MAX( 2, ILAENV( 2, 'CGETRI', ' ', N, -1, -1, -1 ) )
|
|
||||||
END IF
|
|
||||||
ELSE
|
|
||||||
IWS = N
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
|
||||||
*
|
|
||||||
* Use unblocked code.
|
|
||||||
*
|
|
||||||
DO 20 J = N, 1, -1
|
|
||||||
*
|
|
||||||
* Copy current column of L to WORK and replace with zeros.
|
|
||||||
*
|
|
||||||
DO 10 I = J + 1, N
|
|
||||||
WORK( I ) = A( I, J )
|
|
||||||
A( I, J ) = ZERO
|
|
||||||
10 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J.LT.N )
|
|
||||||
$ CALL CGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
|
||||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
|
||||||
20 CONTINUE
|
|
||||||
ELSE
|
|
||||||
*
|
|
||||||
* Use blocked code.
|
|
||||||
*
|
|
||||||
NN = ( ( N-1 ) / NB )*NB + 1
|
|
||||||
DO 50 J = NN, 1, -NB
|
|
||||||
JB = MIN( NB, N-J+1 )
|
|
||||||
*
|
|
||||||
* Copy current block column of L to WORK and replace with
|
|
||||||
* zeros.
|
|
||||||
*
|
|
||||||
DO 40 JJ = J, J + JB - 1
|
|
||||||
DO 30 I = JJ + 1, N
|
|
||||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
|
||||||
A( I, JJ ) = ZERO
|
|
||||||
30 CONTINUE
|
|
||||||
40 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current block column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J+JB.LE.N )
|
|
||||||
$ CALL CGEMM( 'No transpose', 'No transpose', N, JB,
|
|
||||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
|
||||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
|
||||||
CALL CTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
|
||||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Apply column interchanges.
|
|
||||||
*
|
|
||||||
DO 60 J = N - 1, 1, -1
|
|
||||||
JP = IPIV( J )
|
|
||||||
IF( JP.NE.J )
|
|
||||||
$ CALL CSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
|
||||||
60 CONTINUE
|
|
||||||
*
|
|
||||||
WORK( 1 ) = IWS
|
|
||||||
RETURN
|
|
||||||
*
|
|
||||||
* End of CGETRI
|
|
||||||
*
|
|
||||||
END
|
|
|
@ -1,193 +0,0 @@
|
||||||
SUBROUTINE DGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
|
||||||
*
|
|
||||||
* -- LAPACK routine (version 3.0) --
|
|
||||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
|
||||||
* Courant Institute, Argonne National Lab, and Rice University
|
|
||||||
* June 30, 1999
|
|
||||||
*
|
|
||||||
* .. Scalar Arguments ..
|
|
||||||
INTEGER INFO, LDA, LWORK, N
|
|
||||||
* ..
|
|
||||||
* .. Array Arguments ..
|
|
||||||
INTEGER IPIV( * )
|
|
||||||
DOUBLE PRECISION A( LDA, * ), WORK( * )
|
|
||||||
* ..
|
|
||||||
*
|
|
||||||
* Purpose
|
|
||||||
* =======
|
|
||||||
*
|
|
||||||
* DGETRI computes the inverse of a matrix using the LU factorization
|
|
||||||
* computed by DGETRF.
|
|
||||||
*
|
|
||||||
* This method inverts U and then computes inv(A) by solving the system
|
|
||||||
* inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
* Arguments
|
|
||||||
* =========
|
|
||||||
*
|
|
||||||
* N (input) INTEGER
|
|
||||||
* The order of the matrix A. N >= 0.
|
|
||||||
*
|
|
||||||
* A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
|
|
||||||
* On entry, the factors L and U from the factorization
|
|
||||||
* A = P*L*U as computed by DGETRF.
|
|
||||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
|
||||||
*
|
|
||||||
* LDA (input) INTEGER
|
|
||||||
* The leading dimension of the array A. LDA >= max(1,N).
|
|
||||||
*
|
|
||||||
* IPIV (input) INTEGER array, dimension (N)
|
|
||||||
* The pivot indices from DGETRF; for 1<=i<=N, row i of the
|
|
||||||
* matrix was interchanged with row IPIV(i).
|
|
||||||
*
|
|
||||||
* WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
|
|
||||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
|
||||||
*
|
|
||||||
* LWORK (input) INTEGER
|
|
||||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
|
||||||
* For optimal performance LWORK >= N*NB, where NB is
|
|
||||||
* the optimal blocksize returned by ILAENV.
|
|
||||||
*
|
|
||||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
|
||||||
* only calculates the optimal size of the WORK array, returns
|
|
||||||
* this value as the first entry of the WORK array, and no error
|
|
||||||
* message related to LWORK is issued by XERBLA.
|
|
||||||
*
|
|
||||||
* INFO (output) INTEGER
|
|
||||||
* = 0: successful exit
|
|
||||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
|
||||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
|
||||||
* singular and its inverse could not be computed.
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Parameters ..
|
|
||||||
DOUBLE PRECISION ZERO, ONE
|
|
||||||
PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
|
|
||||||
* ..
|
|
||||||
* .. Local Scalars ..
|
|
||||||
LOGICAL LQUERY
|
|
||||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
|
||||||
$ NBMIN, NN
|
|
||||||
* ..
|
|
||||||
* .. External Functions ..
|
|
||||||
INTEGER ILAENV
|
|
||||||
EXTERNAL ILAENV
|
|
||||||
* ..
|
|
||||||
* .. External Subroutines ..
|
|
||||||
EXTERNAL DGEMM, DGEMV, DSWAP, DTRSM, DTRTRI, XERBLA
|
|
||||||
* ..
|
|
||||||
* .. Intrinsic Functions ..
|
|
||||||
INTRINSIC MAX, MIN
|
|
||||||
* ..
|
|
||||||
* .. Executable Statements ..
|
|
||||||
*
|
|
||||||
* Test the input parameters.
|
|
||||||
*
|
|
||||||
INFO = 0
|
|
||||||
NB = ILAENV( 1, 'DGETRI', ' ', N, -1, -1, -1 )
|
|
||||||
LWKOPT = N*NB
|
|
||||||
WORK( 1 ) = LWKOPT
|
|
||||||
LQUERY = ( LWORK.EQ.-1 )
|
|
||||||
IF( N.LT.0 ) THEN
|
|
||||||
INFO = -1
|
|
||||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
|
||||||
INFO = -3
|
|
||||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
|
||||||
INFO = -6
|
|
||||||
END IF
|
|
||||||
IF( INFO.NE.0 ) THEN
|
|
||||||
CALL XERBLA( 'DGETRI', -INFO )
|
|
||||||
RETURN
|
|
||||||
ELSE IF( LQUERY ) THEN
|
|
||||||
RETURN
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Quick return if possible
|
|
||||||
*
|
|
||||||
IF( N.EQ.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
* Form inv(U). If INFO > 0 from DTRTRI, then U is singular,
|
|
||||||
* and the inverse is not computed.
|
|
||||||
*
|
|
||||||
CALL DTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
|
||||||
IF( INFO.GT.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
NBMIN = 2
|
|
||||||
LDWORK = N
|
|
||||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
|
||||||
IWS = MAX( LDWORK*NB, 1 )
|
|
||||||
IF( LWORK.LT.IWS ) THEN
|
|
||||||
NB = LWORK / LDWORK
|
|
||||||
NBMIN = MAX( 2, ILAENV( 2, 'DGETRI', ' ', N, -1, -1, -1 ) )
|
|
||||||
END IF
|
|
||||||
ELSE
|
|
||||||
IWS = N
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
|
||||||
*
|
|
||||||
* Use unblocked code.
|
|
||||||
*
|
|
||||||
DO 20 J = N, 1, -1
|
|
||||||
*
|
|
||||||
* Copy current column of L to WORK and replace with zeros.
|
|
||||||
*
|
|
||||||
DO 10 I = J + 1, N
|
|
||||||
WORK( I ) = A( I, J )
|
|
||||||
A( I, J ) = ZERO
|
|
||||||
10 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J.LT.N )
|
|
||||||
$ CALL DGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
|
||||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
|
||||||
20 CONTINUE
|
|
||||||
ELSE
|
|
||||||
*
|
|
||||||
* Use blocked code.
|
|
||||||
*
|
|
||||||
NN = ( ( N-1 ) / NB )*NB + 1
|
|
||||||
DO 50 J = NN, 1, -NB
|
|
||||||
JB = MIN( NB, N-J+1 )
|
|
||||||
*
|
|
||||||
* Copy current block column of L to WORK and replace with
|
|
||||||
* zeros.
|
|
||||||
*
|
|
||||||
DO 40 JJ = J, J + JB - 1
|
|
||||||
DO 30 I = JJ + 1, N
|
|
||||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
|
||||||
A( I, JJ ) = ZERO
|
|
||||||
30 CONTINUE
|
|
||||||
40 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current block column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J+JB.LE.N )
|
|
||||||
$ CALL DGEMM( 'No transpose', 'No transpose', N, JB,
|
|
||||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
|
||||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
|
||||||
CALL DTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
|
||||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Apply column interchanges.
|
|
||||||
*
|
|
||||||
DO 60 J = N - 1, 1, -1
|
|
||||||
JP = IPIV( J )
|
|
||||||
IF( JP.NE.J )
|
|
||||||
$ CALL DSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
|
||||||
60 CONTINUE
|
|
||||||
*
|
|
||||||
WORK( 1 ) = IWS
|
|
||||||
RETURN
|
|
||||||
*
|
|
||||||
* End of DGETRI
|
|
||||||
*
|
|
||||||
END
|
|
|
@ -1,193 +0,0 @@
|
||||||
SUBROUTINE SGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
|
||||||
*
|
|
||||||
* -- LAPACK routine (version 3.0) --
|
|
||||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
|
||||||
* Courant Institute, Argonne National Lab, and Rice University
|
|
||||||
* June 30, 1999
|
|
||||||
*
|
|
||||||
* .. Scalar Arguments ..
|
|
||||||
INTEGER INFO, LDA, LWORK, N
|
|
||||||
* ..
|
|
||||||
* .. Array Arguments ..
|
|
||||||
INTEGER IPIV( * )
|
|
||||||
REAL A( LDA, * ), WORK( * )
|
|
||||||
* ..
|
|
||||||
*
|
|
||||||
* Purpose
|
|
||||||
* =======
|
|
||||||
*
|
|
||||||
* SGETRI computes the inverse of a matrix using the LU factorization
|
|
||||||
* computed by SGETRF.
|
|
||||||
*
|
|
||||||
* This method inverts U and then computes inv(A) by solving the system
|
|
||||||
* inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
* Arguments
|
|
||||||
* =========
|
|
||||||
*
|
|
||||||
* N (input) INTEGER
|
|
||||||
* The order of the matrix A. N >= 0.
|
|
||||||
*
|
|
||||||
* A (input/output) REAL array, dimension (LDA,N)
|
|
||||||
* On entry, the factors L and U from the factorization
|
|
||||||
* A = P*L*U as computed by SGETRF.
|
|
||||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
|
||||||
*
|
|
||||||
* LDA (input) INTEGER
|
|
||||||
* The leading dimension of the array A. LDA >= max(1,N).
|
|
||||||
*
|
|
||||||
* IPIV (input) INTEGER array, dimension (N)
|
|
||||||
* The pivot indices from SGETRF; for 1<=i<=N, row i of the
|
|
||||||
* matrix was interchanged with row IPIV(i).
|
|
||||||
*
|
|
||||||
* WORK (workspace/output) REAL array, dimension (LWORK)
|
|
||||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
|
||||||
*
|
|
||||||
* LWORK (input) INTEGER
|
|
||||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
|
||||||
* For optimal performance LWORK >= N*NB, where NB is
|
|
||||||
* the optimal blocksize returned by ILAENV.
|
|
||||||
*
|
|
||||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
|
||||||
* only calculates the optimal size of the WORK array, returns
|
|
||||||
* this value as the first entry of the WORK array, and no error
|
|
||||||
* message related to LWORK is issued by XERBLA.
|
|
||||||
*
|
|
||||||
* INFO (output) INTEGER
|
|
||||||
* = 0: successful exit
|
|
||||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
|
||||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
|
||||||
* singular and its inverse could not be computed.
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Parameters ..
|
|
||||||
REAL ZERO, ONE
|
|
||||||
PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
|
|
||||||
* ..
|
|
||||||
* .. Local Scalars ..
|
|
||||||
LOGICAL LQUERY
|
|
||||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
|
||||||
$ NBMIN, NN
|
|
||||||
* ..
|
|
||||||
* .. External Functions ..
|
|
||||||
INTEGER ILAENV
|
|
||||||
EXTERNAL ILAENV
|
|
||||||
* ..
|
|
||||||
* .. External Subroutines ..
|
|
||||||
EXTERNAL SGEMM, SGEMV, SSWAP, STRSM, STRTRI, XERBLA
|
|
||||||
* ..
|
|
||||||
* .. Intrinsic Functions ..
|
|
||||||
INTRINSIC MAX, MIN
|
|
||||||
* ..
|
|
||||||
* .. Executable Statements ..
|
|
||||||
*
|
|
||||||
* Test the input parameters.
|
|
||||||
*
|
|
||||||
INFO = 0
|
|
||||||
NB = ILAENV( 1, 'SGETRI', ' ', N, -1, -1, -1 )
|
|
||||||
LWKOPT = N*NB
|
|
||||||
WORK( 1 ) = LWKOPT
|
|
||||||
LQUERY = ( LWORK.EQ.-1 )
|
|
||||||
IF( N.LT.0 ) THEN
|
|
||||||
INFO = -1
|
|
||||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
|
||||||
INFO = -3
|
|
||||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
|
||||||
INFO = -6
|
|
||||||
END IF
|
|
||||||
IF( INFO.NE.0 ) THEN
|
|
||||||
CALL XERBLA( 'SGETRI', -INFO )
|
|
||||||
RETURN
|
|
||||||
ELSE IF( LQUERY ) THEN
|
|
||||||
RETURN
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Quick return if possible
|
|
||||||
*
|
|
||||||
IF( N.EQ.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
* Form inv(U). If INFO > 0 from STRTRI, then U is singular,
|
|
||||||
* and the inverse is not computed.
|
|
||||||
*
|
|
||||||
CALL STRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
|
||||||
IF( INFO.GT.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
NBMIN = 2
|
|
||||||
LDWORK = N
|
|
||||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
|
||||||
IWS = MAX( LDWORK*NB, 1 )
|
|
||||||
IF( LWORK.LT.IWS ) THEN
|
|
||||||
NB = LWORK / LDWORK
|
|
||||||
NBMIN = MAX( 2, ILAENV( 2, 'SGETRI', ' ', N, -1, -1, -1 ) )
|
|
||||||
END IF
|
|
||||||
ELSE
|
|
||||||
IWS = N
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
|
||||||
*
|
|
||||||
* Use unblocked code.
|
|
||||||
*
|
|
||||||
DO 20 J = N, 1, -1
|
|
||||||
*
|
|
||||||
* Copy current column of L to WORK and replace with zeros.
|
|
||||||
*
|
|
||||||
DO 10 I = J + 1, N
|
|
||||||
WORK( I ) = A( I, J )
|
|
||||||
A( I, J ) = ZERO
|
|
||||||
10 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J.LT.N )
|
|
||||||
$ CALL SGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
|
||||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
|
||||||
20 CONTINUE
|
|
||||||
ELSE
|
|
||||||
*
|
|
||||||
* Use blocked code.
|
|
||||||
*
|
|
||||||
NN = ( ( N-1 ) / NB )*NB + 1
|
|
||||||
DO 50 J = NN, 1, -NB
|
|
||||||
JB = MIN( NB, N-J+1 )
|
|
||||||
*
|
|
||||||
* Copy current block column of L to WORK and replace with
|
|
||||||
* zeros.
|
|
||||||
*
|
|
||||||
DO 40 JJ = J, J + JB - 1
|
|
||||||
DO 30 I = JJ + 1, N
|
|
||||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
|
||||||
A( I, JJ ) = ZERO
|
|
||||||
30 CONTINUE
|
|
||||||
40 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current block column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J+JB.LE.N )
|
|
||||||
$ CALL SGEMM( 'No transpose', 'No transpose', N, JB,
|
|
||||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
|
||||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
|
||||||
CALL STRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
|
||||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Apply column interchanges.
|
|
||||||
*
|
|
||||||
DO 60 J = N - 1, 1, -1
|
|
||||||
JP = IPIV( J )
|
|
||||||
IF( JP.NE.J )
|
|
||||||
$ CALL SSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
|
||||||
60 CONTINUE
|
|
||||||
*
|
|
||||||
WORK( 1 ) = IWS
|
|
||||||
RETURN
|
|
||||||
*
|
|
||||||
* End of SGETRI
|
|
||||||
*
|
|
||||||
END
|
|
|
@ -1,194 +0,0 @@
|
||||||
SUBROUTINE ZGETRI( N, A, LDA, IPIV, WORK, LWORK, INFO )
|
|
||||||
*
|
|
||||||
* -- LAPACK routine (version 3.0) --
|
|
||||||
* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
|
|
||||||
* Courant Institute, Argonne National Lab, and Rice University
|
|
||||||
* June 30, 1999
|
|
||||||
*
|
|
||||||
* .. Scalar Arguments ..
|
|
||||||
INTEGER INFO, LDA, LWORK, N
|
|
||||||
* ..
|
|
||||||
* .. Array Arguments ..
|
|
||||||
INTEGER IPIV( * )
|
|
||||||
COMPLEX*16 A( LDA, * ), WORK( * )
|
|
||||||
* ..
|
|
||||||
*
|
|
||||||
* Purpose
|
|
||||||
* =======
|
|
||||||
*
|
|
||||||
* ZGETRI computes the inverse of a matrix using the LU factorization
|
|
||||||
* computed by ZGETRF.
|
|
||||||
*
|
|
||||||
* This method inverts U and then computes inv(A) by solving the system
|
|
||||||
* inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
* Arguments
|
|
||||||
* =========
|
|
||||||
*
|
|
||||||
* N (input) INTEGER
|
|
||||||
* The order of the matrix A. N >= 0.
|
|
||||||
*
|
|
||||||
* A (input/output) COMPLEX*16 array, dimension (LDA,N)
|
|
||||||
* On entry, the factors L and U from the factorization
|
|
||||||
* A = P*L*U as computed by ZGETRF.
|
|
||||||
* On exit, if INFO = 0, the inverse of the original matrix A.
|
|
||||||
*
|
|
||||||
* LDA (input) INTEGER
|
|
||||||
* The leading dimension of the array A. LDA >= max(1,N).
|
|
||||||
*
|
|
||||||
* IPIV (input) INTEGER array, dimension (N)
|
|
||||||
* The pivot indices from ZGETRF; for 1<=i<=N, row i of the
|
|
||||||
* matrix was interchanged with row IPIV(i).
|
|
||||||
*
|
|
||||||
* WORK (workspace/output) COMPLEX*16 array, dimension (LWORK)
|
|
||||||
* On exit, if INFO=0, then WORK(1) returns the optimal LWORK.
|
|
||||||
*
|
|
||||||
* LWORK (input) INTEGER
|
|
||||||
* The dimension of the array WORK. LWORK >= max(1,N).
|
|
||||||
* For optimal performance LWORK >= N*NB, where NB is
|
|
||||||
* the optimal blocksize returned by ILAENV.
|
|
||||||
*
|
|
||||||
* If LWORK = -1, then a workspace query is assumed; the routine
|
|
||||||
* only calculates the optimal size of the WORK array, returns
|
|
||||||
* this value as the first entry of the WORK array, and no error
|
|
||||||
* message related to LWORK is issued by XERBLA.
|
|
||||||
*
|
|
||||||
* INFO (output) INTEGER
|
|
||||||
* = 0: successful exit
|
|
||||||
* < 0: if INFO = -i, the i-th argument had an illegal value
|
|
||||||
* > 0: if INFO = i, U(i,i) is exactly zero; the matrix is
|
|
||||||
* singular and its inverse could not be computed.
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Parameters ..
|
|
||||||
COMPLEX*16 ZERO, ONE
|
|
||||||
PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ),
|
|
||||||
$ ONE = ( 1.0D+0, 0.0D+0 ) )
|
|
||||||
* ..
|
|
||||||
* .. Local Scalars ..
|
|
||||||
LOGICAL LQUERY
|
|
||||||
INTEGER I, IWS, J, JB, JJ, JP, LDWORK, LWKOPT, NB,
|
|
||||||
$ NBMIN, NN
|
|
||||||
* ..
|
|
||||||
* .. External Functions ..
|
|
||||||
INTEGER ILAENV
|
|
||||||
EXTERNAL ILAENV
|
|
||||||
* ..
|
|
||||||
* .. External Subroutines ..
|
|
||||||
EXTERNAL XERBLA, ZGEMM, ZGEMV, ZSWAP, ZTRSM, ZTRTRI
|
|
||||||
* ..
|
|
||||||
* .. Intrinsic Functions ..
|
|
||||||
INTRINSIC MAX, MIN
|
|
||||||
* ..
|
|
||||||
* .. Executable Statements ..
|
|
||||||
*
|
|
||||||
* Test the input parameters.
|
|
||||||
*
|
|
||||||
INFO = 0
|
|
||||||
NB = ILAENV( 1, 'ZGETRI', ' ', N, -1, -1, -1 )
|
|
||||||
LWKOPT = N*NB
|
|
||||||
WORK( 1 ) = LWKOPT
|
|
||||||
LQUERY = ( LWORK.EQ.-1 )
|
|
||||||
IF( N.LT.0 ) THEN
|
|
||||||
INFO = -1
|
|
||||||
ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
|
|
||||||
INFO = -3
|
|
||||||
ELSE IF( LWORK.LT.MAX( 1, N ) .AND. .NOT.LQUERY ) THEN
|
|
||||||
INFO = -6
|
|
||||||
END IF
|
|
||||||
IF( INFO.NE.0 ) THEN
|
|
||||||
CALL XERBLA( 'ZGETRI', -INFO )
|
|
||||||
RETURN
|
|
||||||
ELSE IF( LQUERY ) THEN
|
|
||||||
RETURN
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Quick return if possible
|
|
||||||
*
|
|
||||||
IF( N.EQ.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
* Form inv(U). If INFO > 0 from ZTRTRI, then U is singular,
|
|
||||||
* and the inverse is not computed.
|
|
||||||
*
|
|
||||||
CALL ZTRTRI( 'Upper', 'Non-unit', N, A, LDA, INFO )
|
|
||||||
IF( INFO.GT.0 )
|
|
||||||
$ RETURN
|
|
||||||
*
|
|
||||||
NBMIN = 2
|
|
||||||
LDWORK = N
|
|
||||||
IF( NB.GT.1 .AND. NB.LT.N ) THEN
|
|
||||||
IWS = MAX( LDWORK*NB, 1 )
|
|
||||||
IF( LWORK.LT.IWS ) THEN
|
|
||||||
NB = LWORK / LDWORK
|
|
||||||
NBMIN = MAX( 2, ILAENV( 2, 'ZGETRI', ' ', N, -1, -1, -1 ) )
|
|
||||||
END IF
|
|
||||||
ELSE
|
|
||||||
IWS = N
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Solve the equation inv(A)*L = inv(U) for inv(A).
|
|
||||||
*
|
|
||||||
IF( NB.LT.NBMIN .OR. NB.GE.N ) THEN
|
|
||||||
*
|
|
||||||
* Use unblocked code.
|
|
||||||
*
|
|
||||||
DO 20 J = N, 1, -1
|
|
||||||
*
|
|
||||||
* Copy current column of L to WORK and replace with zeros.
|
|
||||||
*
|
|
||||||
DO 10 I = J + 1, N
|
|
||||||
WORK( I ) = A( I, J )
|
|
||||||
A( I, J ) = ZERO
|
|
||||||
10 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J.LT.N )
|
|
||||||
$ CALL ZGEMV( 'No transpose', N, N-J, -ONE, A( 1, J+1 ),
|
|
||||||
$ LDA, WORK( J+1 ), 1, ONE, A( 1, J ), 1 )
|
|
||||||
20 CONTINUE
|
|
||||||
ELSE
|
|
||||||
*
|
|
||||||
* Use blocked code.
|
|
||||||
*
|
|
||||||
NN = ( ( N-1 ) / NB )*NB + 1
|
|
||||||
DO 50 J = NN, 1, -NB
|
|
||||||
JB = MIN( NB, N-J+1 )
|
|
||||||
*
|
|
||||||
* Copy current block column of L to WORK and replace with
|
|
||||||
* zeros.
|
|
||||||
*
|
|
||||||
DO 40 JJ = J, J + JB - 1
|
|
||||||
DO 30 I = JJ + 1, N
|
|
||||||
WORK( I+( JJ-J )*LDWORK ) = A( I, JJ )
|
|
||||||
A( I, JJ ) = ZERO
|
|
||||||
30 CONTINUE
|
|
||||||
40 CONTINUE
|
|
||||||
*
|
|
||||||
* Compute current block column of inv(A).
|
|
||||||
*
|
|
||||||
IF( J+JB.LE.N )
|
|
||||||
$ CALL ZGEMM( 'No transpose', 'No transpose', N, JB,
|
|
||||||
$ N-J-JB+1, -ONE, A( 1, J+JB ), LDA,
|
|
||||||
$ WORK( J+JB ), LDWORK, ONE, A( 1, J ), LDA )
|
|
||||||
CALL ZTRSM( 'Right', 'Lower', 'No transpose', 'Unit', N, JB,
|
|
||||||
$ ONE, WORK( J ), LDWORK, A( 1, J ), LDA )
|
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
*
|
|
||||||
* Apply column interchanges.
|
|
||||||
*
|
|
||||||
DO 60 J = N - 1, 1, -1
|
|
||||||
JP = IPIV( J )
|
|
||||||
IF( JP.NE.J )
|
|
||||||
$ CALL ZSWAP( N, A( 1, J ), 1, A( 1, JP ), 1 )
|
|
||||||
60 CONTINUE
|
|
||||||
*
|
|
||||||
WORK( 1 ) = IWS
|
|
||||||
RETURN
|
|
||||||
*
|
|
||||||
* End of ZGETRI
|
|
||||||
*
|
|
||||||
END
|
|
|
@ -1,190 +1,113 @@
|
||||||
/*********************************************************************/
|
/***************************************************************************
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
* Copyright (c) 2013, The OpenBLAS Project
|
||||||
/* All rights reserved. */
|
* All rights reserved.
|
||||||
/* */
|
* Redistribution and use in source and binary forms, with or without
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
* modification, are permitted provided that the following conditions are
|
||||||
/* without modification, are permitted provided that the following */
|
* met:
|
||||||
/* conditions are met: */
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
/* */
|
* notice, this list of conditions and the following disclaimer.
|
||||||
/* 1. Redistributions of source code must retain the above */
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
/* copyright notice, this list of conditions and the following */
|
* notice, this list of conditions and the following disclaimer in
|
||||||
/* disclaimer. */
|
* the documentation and/or other materials provided with the
|
||||||
/* */
|
* distribution.
|
||||||
/* 2. Redistributions in binary form must reproduce the above */
|
* 3. Neither the name of the OpenBLAS project nor the names of
|
||||||
/* copyright notice, this list of conditions and the following */
|
* its contributors may be used to endorse or promote products
|
||||||
/* disclaimer in the documentation and/or other materials */
|
* derived from this software without specific prior written permission.
|
||||||
/* provided with the distribution. */
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
/* */
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
* *****************************************************************************/
|
||||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
||||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
/**************************************************************************************
|
||||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
* 2014/05/22 Saar
|
||||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
* TEST double precision unblocked : OK
|
||||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
* 2014/05/23 Saar
|
||||||
/* */
|
* TEST double precision blocked: OK
|
||||||
/* The views and conclusions contained in the software and */
|
* TEST single precision blocked: OK
|
||||||
/* documentation are those of the authors and should not be */
|
**************************************************************************************/
|
||||||
/* interpreted as representing official policies, either expressed */
|
|
||||||
/* or implied, of The University of Texas at Austin. */
|
|
||||||
/*********************************************************************/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
static FLOAT dp1 = 1.;
|
// static FLOAT dp1 = 1.;
|
||||||
static FLOAT dm1 = -1.;
|
// static FLOAT dm1 = -1.;
|
||||||
|
|
||||||
|
|
||||||
#ifdef UNIT
|
#ifdef UNIT
|
||||||
#define TRTI2 TRTI2_LU
|
#define TRTI2 TRTI2_LU
|
||||||
|
#define TRMM TRMM_LNLU
|
||||||
|
#define TRSM TRSM_RNLU
|
||||||
#else
|
#else
|
||||||
#define TRTI2 TRTI2_LN
|
#define TRTI2 TRTI2_LN
|
||||||
|
#define TRMM TRMM_LNLN
|
||||||
|
#define TRSM TRSM_RNLN
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
|
||||||
#undef GEMM_P
|
|
||||||
#undef GEMM_Q
|
|
||||||
#undef GEMM_R
|
|
||||||
|
|
||||||
#define GEMM_P 8
|
|
||||||
#define GEMM_Q 20
|
|
||||||
#define GEMM_R 64
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define GEMM_PQ MAX(GEMM_P, GEMM_Q)
|
|
||||||
#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ)
|
|
||||||
|
|
||||||
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
|
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
|
||||||
|
|
||||||
BLASLONG n, lda;
|
BLASLONG j, n, lda;
|
||||||
FLOAT *a;
|
FLOAT *a;
|
||||||
|
|
||||||
BLASLONG i, is, min_i, start_i;
|
// BLASLONG info=0;
|
||||||
BLASLONG ls, min_l;
|
BLASLONG jb;
|
||||||
BLASLONG bk;
|
BLASLONG NB;
|
||||||
BLASLONG blocking;
|
BLASLONG start_j;
|
||||||
BLASLONG range_N[2];
|
|
||||||
|
|
||||||
FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb);
|
FLOAT beta_plus[2] = { ONE, ZERO};
|
||||||
FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb
|
FLOAT beta_minus[2] = {-ONE, ZERO};
|
||||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
|
||||||
+ GEMM_OFFSET_A);
|
|
||||||
FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm
|
|
||||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
|
||||||
+ GEMM_OFFSET_B);
|
|
||||||
|
|
||||||
n = args -> n;
|
n = args -> n;
|
||||||
a = (FLOAT *)args -> a;
|
|
||||||
lda = args -> lda;
|
|
||||||
|
|
||||||
if (range_n) {
|
NB = GEMM_Q;
|
||||||
n = range_n[1] - range_n[0];
|
|
||||||
a += range_n[0] * (lda + 1) * COMPSIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (n <= DTB_ENTRIES) {
|
if (n < NB) {
|
||||||
TRTI2(args, NULL, range_n, sa, sb, 0);
|
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
blocking = GEMM_Q;
|
|
||||||
if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4;
|
|
||||||
|
|
||||||
start_i = 0;
|
lda = args -> lda;
|
||||||
while (start_i < n) start_i += blocking;
|
a = (FLOAT *) args -> a;
|
||||||
start_i -= blocking;
|
args -> ldb = lda;
|
||||||
|
args -> ldc = lda;
|
||||||
|
args -> alpha = NULL;
|
||||||
|
|
||||||
for (i = start_i; i >= 0; i -= blocking) {
|
start_j = 0;
|
||||||
bk = MIN(blocking, n - i);
|
while (start_j < n) start_j += NB;
|
||||||
|
start_j -= NB;
|
||||||
|
|
||||||
if (n - bk - i > 0) TRSM_OLNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm);
|
|
||||||
|
|
||||||
if (!range_n) {
|
for (j = start_j ; j >=0 ; j-= NB)
|
||||||
range_N[0] = i;
|
{
|
||||||
range_N[1] = i + bk;
|
jb = n - j;
|
||||||
} else {
|
if ( jb > NB ) jb = NB;
|
||||||
range_N[0] = range_n[0] + i;
|
|
||||||
range_N[1] = range_n[0] + i + bk;
|
|
||||||
}
|
|
||||||
|
|
||||||
CNAME(args, NULL, range_N, sa, sa_trmm, 0);
|
args -> n = jb;
|
||||||
|
args -> m = n-j-jb;
|
||||||
|
|
||||||
if (i > 0) {
|
args -> a = &a[(j+jb+(j+jb)*lda) * COMPSIZE];
|
||||||
TRMM_ILTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm);
|
args -> b = &a[(j+jb+j*lda) * COMPSIZE];
|
||||||
|
args -> beta = beta_plus;
|
||||||
|
|
||||||
for (ls = 0; ls < i; ls += REAL_GEMM_R) {
|
TRMM(args, NULL, NULL, sa, sb, 0);
|
||||||
min_l = i - ls;
|
|
||||||
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
|
|
||||||
|
|
||||||
GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm);
|
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||||
|
args -> beta = beta_minus;
|
||||||
|
|
||||||
if (n - bk - i > 0) {
|
TRSM(args, NULL, NULL, sa, sb, 0);
|
||||||
for (is = i + bk; is < n; is += GEMM_P) {
|
|
||||||
min_i = n - is;
|
|
||||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
|
||||||
|
|
||||||
if (ls == 0) {
|
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||||
NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
|
||||||
|
|
||||||
TRSM_KERNEL_RT(min_i, bk, bk, dm1,
|
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa, sa_trsm,
|
|
||||||
a + (is + i * lda) * COMPSIZE, lda, 0);
|
|
||||||
} else {
|
|
||||||
GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
|
||||||
}
|
|
||||||
|
|
||||||
GEMM_KERNEL_N(min_i, min_l, bk, dp1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa, sb_gemm,
|
|
||||||
a + (is + ls * lda) * COMPSIZE, lda);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (is = 0; is < bk; is += GEMM_P) {
|
|
||||||
min_i = bk - is;
|
|
||||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
|
||||||
|
|
||||||
TRMM_KERNEL_LT(min_i, min_l, bk, dp1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa_trmm + is * bk * COMPSIZE, sb_gemm,
|
|
||||||
a + (i + is + ls * lda) * COMPSIZE, lda, is);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
|
|
||||||
if (n - bk - i > 0) {
|
|
||||||
for (is = 0; is < n - bk - i; is += GEMM_P) {
|
|
||||||
min_i = n - bk - i - is;
|
|
||||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
|
||||||
|
|
||||||
NEG_TCOPY (bk, min_i, a + (i + bk + is + i * lda) * COMPSIZE, lda, sa);
|
|
||||||
|
|
||||||
TRSM_KERNEL_RT(min_i, bk, bk, dm1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa, sa_trsm,
|
|
||||||
a + (i + bk + is + i * lda) * COMPSIZE, lda, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,46 +1,44 @@
|
||||||
/*********************************************************************/
|
/***************************************************************************
|
||||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
* Copyright (c) 2013, The OpenBLAS Project
|
||||||
/* All rights reserved. */
|
* All rights reserved.
|
||||||
/* */
|
* Redistribution and use in source and binary forms, with or without
|
||||||
/* Redistribution and use in source and binary forms, with or */
|
* modification, are permitted provided that the following conditions are
|
||||||
/* without modification, are permitted provided that the following */
|
* met:
|
||||||
/* conditions are met: */
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
/* */
|
* notice, this list of conditions and the following disclaimer.
|
||||||
/* 1. Redistributions of source code must retain the above */
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
/* copyright notice, this list of conditions and the following */
|
* notice, this list of conditions and the following disclaimer in
|
||||||
/* disclaimer. */
|
* the documentation and/or other materials provided with the
|
||||||
/* */
|
* distribution.
|
||||||
/* 2. Redistributions in binary form must reproduce the above */
|
* 3. Neither the name of the OpenBLAS project nor the names of
|
||||||
/* copyright notice, this list of conditions and the following */
|
* its contributors may be used to endorse or promote products
|
||||||
/* disclaimer in the documentation and/or other materials */
|
* derived from this software without specific prior written permission.
|
||||||
/* provided with the distribution. */
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
/* */
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
|
* *****************************************************************************/
|
||||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
|
|
||||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
|
/**************************************************************************************
|
||||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
|
* 2014/05/22 Saar
|
||||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
|
* TEST double precision unblocked : OK
|
||||||
/* POSSIBILITY OF SUCH DAMAGE. */
|
* TEST double precision blocked : OK
|
||||||
/* */
|
* 2014/05/23
|
||||||
/* The views and conclusions contained in the software and */
|
* TEST single precision blocked : OK
|
||||||
/* documentation are those of the authors and should not be */
|
*
|
||||||
/* interpreted as representing official policies, either expressed */
|
**************************************************************************************/
|
||||||
/* or implied, of The University of Texas at Austin. */
|
|
||||||
/*********************************************************************/
|
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
static FLOAT dp1 = 1.;
|
// static FLOAT dp1 = 1.;
|
||||||
static FLOAT dm1 = -1.;
|
// static FLOAT dm1 = -1.;
|
||||||
|
|
||||||
#ifdef UNIT
|
#ifdef UNIT
|
||||||
#define TRTI2 TRTI2_UU
|
#define TRTI2 TRTI2_UU
|
||||||
|
@ -48,152 +46,66 @@ static FLOAT dm1 = -1.;
|
||||||
#define TRTI2 TRTI2_UN
|
#define TRTI2 TRTI2_UN
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if 0
|
#ifdef UNIT
|
||||||
#undef GEMM_P
|
#define TRMM TRMM_LNUU
|
||||||
#undef GEMM_Q
|
#define TRSM TRSM_RNUU
|
||||||
#undef GEMM_R
|
#else
|
||||||
|
#define TRMM TRMM_LNUN
|
||||||
#define GEMM_P 8
|
#define TRSM TRSM_RNUN
|
||||||
#define GEMM_Q 20
|
|
||||||
#define GEMM_R 64
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define GEMM_PQ MAX(GEMM_P, GEMM_Q)
|
|
||||||
#define REAL_GEMM_R (GEMM_R - 2 * GEMM_PQ)
|
|
||||||
|
|
||||||
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
|
blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG myid) {
|
||||||
|
|
||||||
BLASLONG n, lda;
|
BLASLONG j, n, lda;
|
||||||
FLOAT *a;
|
FLOAT *a;
|
||||||
|
|
||||||
BLASLONG i, is, min_i, start_is;
|
// BLASLONG info=0;
|
||||||
BLASLONG ls, min_l;
|
BLASLONG jb;
|
||||||
BLASLONG bk;
|
BLASLONG NB;
|
||||||
BLASLONG blocking;
|
|
||||||
BLASLONG range_N[2];
|
|
||||||
|
|
||||||
FLOAT *sa_trsm = (FLOAT *)((BLASLONG)sb);
|
FLOAT beta_plus[2] = { ONE, ZERO};
|
||||||
FLOAT *sa_trmm = (FLOAT *)((((BLASLONG)sb
|
FLOAT beta_minus[2] = {-ONE, ZERO};
|
||||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
|
||||||
+ GEMM_OFFSET_A);
|
|
||||||
FLOAT *sb_gemm = (FLOAT *)((((BLASLONG)sa_trmm
|
|
||||||
+ GEMM_PQ * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)
|
|
||||||
+ GEMM_OFFSET_B);
|
|
||||||
|
|
||||||
n = args -> n;
|
n = args -> n;
|
||||||
a = (FLOAT *)args -> a;
|
|
||||||
lda = args -> lda;
|
|
||||||
|
|
||||||
if (range_n) {
|
NB = GEMM_Q;
|
||||||
n = range_n[1] - range_n[0];
|
|
||||||
a += range_n[0] * (lda + 1) * COMPSIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (n <= DTB_ENTRIES) {
|
if (n <= NB) {
|
||||||
TRTI2(args, NULL, range_n, sa, sb, 0);
|
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
blocking = GEMM_Q;
|
|
||||||
if (n <= 4 * GEMM_Q) blocking = (n + 3) / 4;
|
|
||||||
|
|
||||||
for (i = 0; i < n; i += blocking) {
|
lda = args -> lda;
|
||||||
bk = MIN(blocking, n - i);
|
a = (FLOAT *) args -> a;
|
||||||
|
args -> ldb = lda;
|
||||||
|
args -> ldc = lda;
|
||||||
|
args -> alpha = NULL;
|
||||||
|
|
||||||
if (i > 0) TRSM_OUNCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, sa_trsm);
|
for (j = 0; j < n; j += NB)
|
||||||
|
{
|
||||||
|
jb = n - j;
|
||||||
|
if ( jb > NB ) jb = NB;
|
||||||
|
|
||||||
|
args -> n = jb;
|
||||||
|
args -> m = j;
|
||||||
|
|
||||||
|
args -> a = &a[0];
|
||||||
|
args -> b = &a[(j*lda) * COMPSIZE];
|
||||||
|
args -> beta = beta_plus;
|
||||||
|
|
||||||
|
TRMM(args, NULL, NULL, sa, sb, 0);
|
||||||
|
|
||||||
|
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||||
|
args -> beta = beta_minus;
|
||||||
|
|
||||||
|
TRSM(args, NULL, NULL, sa, sb, 0);
|
||||||
|
|
||||||
|
args -> a = &a[(j+j*lda) * COMPSIZE];
|
||||||
|
|
||||||
|
TRTI2(args, NULL, range_n, sa, sb, 0);
|
||||||
|
|
||||||
if (!range_n) {
|
|
||||||
range_N[0] = i;
|
|
||||||
range_N[1] = i + bk;
|
|
||||||
} else {
|
|
||||||
range_N[0] = range_n[0] + i;
|
|
||||||
range_N[1] = range_n[0] + i + bk;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
CNAME(args, NULL, range_N, sa, sa_trmm, 0);
|
|
||||||
|
|
||||||
if (n -bk - i > 0) {
|
|
||||||
TRMM_IUTCOPY(bk, bk, a + (i + i * lda) * COMPSIZE, lda, 0, 0, sa_trmm);
|
|
||||||
|
|
||||||
for (ls = i + bk; ls < n; ls += REAL_GEMM_R) {
|
|
||||||
min_l = n - ls;
|
|
||||||
if (min_l > REAL_GEMM_R) min_l = REAL_GEMM_R;
|
|
||||||
|
|
||||||
GEMM_ONCOPY (bk, min_l, a + (i + ls * lda) * COMPSIZE, lda, sb_gemm);
|
|
||||||
|
|
||||||
if (i > 0) {
|
|
||||||
for (is = 0; is < i; is += GEMM_P) {
|
|
||||||
min_i = i - is;
|
|
||||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
|
||||||
|
|
||||||
if (ls == i + bk) {
|
|
||||||
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
|
||||||
|
|
||||||
GEMM_BETA(min_i, bk, 0, dm1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
|
|
||||||
|
|
||||||
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa, sa_trsm,
|
|
||||||
a + (is + i * lda) * COMPSIZE, lda, 0);
|
|
||||||
} else {
|
|
||||||
GEMM_ITCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
|
||||||
}
|
|
||||||
|
|
||||||
GEMM_KERNEL_N(min_i, min_l, bk, dp1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa, sb_gemm,
|
|
||||||
a + (is + ls * lda) * COMPSIZE, lda);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
start_is = 0;
|
|
||||||
while (start_is < bk) start_is += GEMM_P;
|
|
||||||
start_is -= GEMM_P;
|
|
||||||
|
|
||||||
for (is = 0; is < bk; is += GEMM_P) {
|
|
||||||
min_i = bk - is;
|
|
||||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
|
||||||
|
|
||||||
TRMM_KERNEL_LN(min_i, min_l, bk, dp1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa_trmm + is * bk * COMPSIZE, sb_gemm,
|
|
||||||
a + (i + is + ls * lda) * COMPSIZE, lda, is);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
|
||||||
if (i > 0) {
|
|
||||||
for (is = 0; is < i; is += GEMM_P) {
|
|
||||||
min_i = i - is;
|
|
||||||
if (min_i > GEMM_P) min_i = GEMM_P;
|
|
||||||
|
|
||||||
//NEG_TCOPY (bk, min_i, a + (is + i * lda) * COMPSIZE, lda, sa);
|
|
||||||
GEMM_BETA(min_i, bk, 0, dm1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
NULL, 0, NULL, 0, a + (is + i * lda) * COMPSIZE, lda);
|
|
||||||
|
|
||||||
TRSM_KERNEL_RN(min_i, bk, bk, dm1,
|
|
||||||
#ifdef COMPLEX
|
|
||||||
ZERO,
|
|
||||||
#endif
|
|
||||||
sa, sa_trsm,
|
|
||||||
a + (is + i * lda) * COMPSIZE, lda, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
8
make.inc
8
make.inc
|
@ -1,11 +1,7 @@
|
||||||
SHELL = /bin/sh
|
SHELL = /bin/sh
|
||||||
PLAT = _LINUX
|
PLAT = _LINUX
|
||||||
DRVOPTS = $(OPTS)
|
DRVOPTS = $(OPTS)
|
||||||
LOADER = $(FORTRAN)
|
LOADER = $(FORTRAN) -pthread
|
||||||
TIMER = NONE
|
|
||||||
ARCHFLAGS= -ru
|
ARCHFLAGS= -ru
|
||||||
#RANLIB = ranlib
|
#RANLIB = ranlib
|
||||||
BLASLIB = ../../../libopenblas.a
|
|
||||||
TMGLIB = tmglib.a
|
|
||||||
#EIGSRCLIB = eigsrc.a
|
|
||||||
#LINSRCLIB = linsrc.a
|
|
||||||
|
|
Loading…
Reference in New Issue