Merge pull request #696 from ashwinyes/develop_20151120_lapack_test_fixes
Cortex A57 fixes and Lapack 3.6.0
This commit is contained in:
commit
b4380acf77
16
Makefile
16
Makefile
|
|
@ -249,10 +249,14 @@ ifndef NOFORTRAN
|
||||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
ifeq ($(FC), gfortran)
|
ifeq ($(F_COMPILER), GFORTRAN)
|
||||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
ifdef SMP
|
ifdef SMP
|
||||||
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
else
|
||||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
endif
|
endif
|
||||||
|
|
@ -288,7 +292,17 @@ endif
|
||||||
lapack-test :
|
lapack-test :
|
||||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||||
|
ifneq ($(CROSS), 1)
|
||||||
|
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||||
|
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
|
endif
|
||||||
|
|
||||||
|
lapack-runtest:
|
||||||
|
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||||
|
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||||
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||||
|
|
||||||
|
|
||||||
blas-test:
|
blas-test:
|
||||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||||
|
|
|
||||||
|
|
@ -971,16 +971,29 @@ ifeq ($(DEBUG), 1)
|
||||||
COMMON_OPT += -g
|
COMMON_OPT += -g
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(DEBUG), 1)
|
||||||
|
FCOMMON_OPT += -g
|
||||||
|
endif
|
||||||
|
|
||||||
ifndef COMMON_OPT
|
ifndef COMMON_OPT
|
||||||
COMMON_OPT = -O2
|
COMMON_OPT = -O2
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifndef FCOMMON_OPT
|
||||||
|
ifeq ($(OSNAME), WINNT)
|
||||||
|
FCOMMON_OPT = -O0
|
||||||
|
else
|
||||||
|
FCOMMON_OPT = -O2 -frecursive
|
||||||
|
endif
|
||||||
|
endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||||
|
|
||||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
override FFLAGS += $(FCOMMON_OPT)
|
||||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||||
#MAKEOVERRIDES =
|
#MAKEOVERRIDES =
|
||||||
|
|
||||||
#For LAPACK Fortran codes.
|
#For LAPACK Fortran codes.
|
||||||
|
|
|
||||||
|
|
@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
#ifndef ASSEMBLER
|
#ifndef ASSEMBLER
|
||||||
|
|
||||||
|
|
||||||
static void __inline blas_lock(volatile BLASULONG *address){
|
static void __inline blas_lock(volatile BLASULONG *address){
|
||||||
|
|
||||||
long register ret;
|
BLASULONG ret;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
while (*address) {YIELDING;};
|
while (*address) {YIELDING;};
|
||||||
|
|
||||||
__asm__ __volatile__(
|
__asm__ __volatile__(
|
||||||
"ldaxr %0, [%1] \n\t"
|
"mov x4, #1 \n\t"
|
||||||
"stlxr w2, %2, [%1] \n\t"
|
"1: \n\t"
|
||||||
"orr %0, %0, x2 \n\t"
|
"ldaxr x2, [%1] \n\t"
|
||||||
: "=r"(ret)
|
"cbnz x2, 1b \n\t"
|
||||||
: "r"(address), "r"(1l)
|
"2: \n\t"
|
||||||
: "memory", "x2"
|
"stxr w3, x4, [%1] \n\t"
|
||||||
|
"cbnz w3, 1b \n\t"
|
||||||
|
"mov %0, #0 \n\t"
|
||||||
|
: "=r"(ret), "=r"(address)
|
||||||
|
: "1"(address)
|
||||||
|
: "memory", "x2" , "x3", "x4"
|
||||||
|
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
} while (ret);
|
} while (ret);
|
||||||
MB;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define BLAS_LOCK_DEFINED
|
#define BLAS_LOCK_DEFINED
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
static inline int blas_quickdivide(blasint x, blasint y){
|
static inline int blas_quickdivide(blasint x, blasint y){
|
||||||
return x / y;
|
return x / y;
|
||||||
}
|
}
|
||||||
|
|
@ -110,7 +121,7 @@ REALNAME:
|
||||||
#define HUGE_PAGESIZE ( 4 << 20)
|
#define HUGE_PAGESIZE ( 4 << 20)
|
||||||
|
|
||||||
#if defined(CORTEXA57)
|
#if defined(CORTEXA57)
|
||||||
#define BUFFER_SIZE (40 << 20)
|
#define BUFFER_SIZE (20 << 20)
|
||||||
#else
|
#else
|
||||||
#define BUFFER_SIZE (16 << 20)
|
#define BUFFER_SIZE (16 << 20)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <linux/unistd.h>
|
#include <linux/unistd.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <sys/resource.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||||
|
|
@ -1361,6 +1363,18 @@ void CONSTRUCTOR gotoblas_init(void) {
|
||||||
gotoblas_memory_init();
|
gotoblas_memory_init();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(OS_LINUX)
|
||||||
|
struct rlimit curlimit;
|
||||||
|
if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 )
|
||||||
|
{
|
||||||
|
if ( curlimit.rlim_cur != curlimit.rlim_max )
|
||||||
|
{
|
||||||
|
curlimit.rlim_cur = curlimit.rlim_max;
|
||||||
|
setrlimit(RLIMIT_STACK, &curlimit);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
if (blas_cpu_number == 0) blas_get_cpu_number();
|
if (blas_cpu_number == 0) blas_get_cpu_number();
|
||||||
#ifdef SMP_SERVER
|
#ifdef SMP_SERVER
|
||||||
|
|
|
||||||
|
|
@ -173,18 +173,18 @@
|
||||||
sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv,
|
sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv,
|
||||||
sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2,
|
sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2,
|
||||||
sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx,
|
sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx,
|
||||||
sgegs, sgegv, sgehd2, sgehrd, sgelq2, sgelqf,
|
sgehd2, sgehrd, sgelq2, sgelqf,
|
||||||
sgels, sgelsd, sgelss, sgelsx, sgelsy, sgeql2, sgeqlf,
|
sgels, sgelsd, sgelss, sgelsy, sgeql2, sgeqlf,
|
||||||
sgeqp3, sgeqpf, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
|
sgeqp3, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
|
||||||
sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx,
|
sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx,
|
||||||
sgetc2, sgetri,
|
sgetc2, sgetri,
|
||||||
sggbak, sggbal, sgges, sggesx, sggev, sggevx,
|
sggbak, sggbal, sgges, sggesx, sggev, sggevx,
|
||||||
sggglm, sgghrd, sgglse, sggqrf,
|
sggglm, sgghrd, sgglse, sggqrf,
|
||||||
sggrqf, sggsvd, sggsvp, sgtcon, sgtrfs, sgtsv,
|
sggrqf, sgtcon, sgtrfs, sgtsv,
|
||||||
sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz,
|
sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz,
|
||||||
shsein, shseqr, slabrd, slacon, slacn2,
|
shsein, shseqr, slabrd, slacon, slacn2,
|
||||||
slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr,
|
slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr,
|
||||||
slahrd, slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
|
slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
|
||||||
slangb, slange, slangt, slanhs, slansb, slansp,
|
slangb, slange, slangt, slanhs, slansb, slansp,
|
||||||
slansy, slantb, slantp, slantr, slanv2,
|
slansy, slantb, slantp, slantr, slanv2,
|
||||||
slapll, slapmt,
|
slapll, slapmt,
|
||||||
|
|
@ -194,7 +194,7 @@
|
||||||
slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv,
|
slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv,
|
||||||
slarrv, slartv,
|
slarrv, slartv,
|
||||||
slarz, slarzb, slarzt, slasy2, slasyf,
|
slarz, slarzb, slarzt, slasy2, slasyf,
|
||||||
slatbs, slatdf, slatps, slatrd, slatrs, slatrz, slatzm,
|
slatbs, slatdf, slatps, slatrd, slatrs, slatrz,
|
||||||
sopgtr, sopmtr, sorg2l, sorg2r,
|
sopgtr, sopmtr, sorg2l, sorg2r,
|
||||||
sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2,
|
sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2,
|
||||||
sorgrq, sorgtr, sorm2l, sorm2r,
|
sorgrq, sorgtr, sorm2l, sorm2r,
|
||||||
|
|
@ -220,7 +220,7 @@
|
||||||
stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri,
|
stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri,
|
||||||
stptrs,
|
stptrs,
|
||||||
strcon, strevc, strexc, strrfs, strsen, strsna, strsyl,
|
strcon, strevc, strexc, strrfs, strsen, strsna, strsyl,
|
||||||
strtrs, stzrqf, stzrzf, sstemr,
|
strtrs, stzrzf, sstemr,
|
||||||
slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp,
|
slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp,
|
||||||
stfttr, stpttf, stpttr, strttf, strttp,
|
stfttr, stpttf, stpttr, strttf, strttp,
|
||||||
sgejsv, sgesvj, sgsvj0, sgsvj1,
|
sgejsv, sgesvj, sgsvj0, sgsvj1,
|
||||||
|
|
@ -245,14 +245,13 @@
|
||||||
cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx,
|
cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx,
|
||||||
cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd,
|
cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd,
|
||||||
cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx,
|
cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx,
|
||||||
cgegs, cgegv, cgehd2, cgehrd, cgelq2, cgelqf,
|
cgehd2, cgehrd, cgelq2, cgelqf,
|
||||||
cgels, cgelsd, cgelss, cgelsx, cgelsy, cgeql2, cgeqlf, cgeqp3,
|
cgels, cgelsd, cgelss, cgelsy, cgeql2, cgeqlf, cgeqp3,
|
||||||
cgeqpf, cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
|
cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
|
||||||
cgerq2, cgerqf, cgesc2, cgesdd, cgesvd,
|
cgerq2, cgerqf, cgesc2, cgesdd, cgesvd,
|
||||||
cgesvx, cgetc2, cgetri,
|
cgesvx, cgetc2, cgetri,
|
||||||
cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm,
|
cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm,
|
||||||
cgghrd, cgglse, cggqrf, cggrqf,
|
cgghrd, cgglse, cggqrf, cggrqf,
|
||||||
cggsvd, cggsvp,
|
|
||||||
cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev,
|
cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev,
|
||||||
chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd,
|
chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd,
|
||||||
checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst,
|
checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst,
|
||||||
|
|
@ -267,7 +266,7 @@
|
||||||
claed0, claed7, claed8,
|
claed0, claed7, claed8,
|
||||||
claein, claesy, claev2, clags2, clagtm,
|
claein, claesy, claev2, clags2, clagtm,
|
||||||
clahef, clahqr,
|
clahef, clahqr,
|
||||||
clahrd, clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
|
clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
|
||||||
clanhb, clanhe,
|
clanhb, clanhe,
|
||||||
clanhp, clanhs, clanht, clansb, clansp, clansy, clantb,
|
clanhp, clanhs, clanht, clansb, clansp, clansy, clantb,
|
||||||
clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge,
|
clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge,
|
||||||
|
|
@ -278,7 +277,7 @@
|
||||||
clarfx, clargv, clarnv, clarrv, clartg, clartv,
|
clarfx, clargv, clarnv, clarrv, clartg, clartv,
|
||||||
clarz, clarzb, clarzt, clascl, claset, clasr, classq,
|
clarz, clarzb, clarzt, clascl, claset, clasr, classq,
|
||||||
clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz,
|
clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz,
|
||||||
clatzm, cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
|
cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
|
||||||
cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs,
|
cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs,
|
||||||
cposv, cposvx, cpstrf, cpstf2,
|
cposv, cposvx, cpstrf, cpstf2,
|
||||||
cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs,
|
cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs,
|
||||||
|
|
@ -293,7 +292,7 @@
|
||||||
ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon,
|
ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon,
|
||||||
ctprfs, ctptri,
|
ctprfs, ctptri,
|
||||||
ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna,
|
ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna,
|
||||||
ctrsyl, ctrtrs, ctzrqf, ctzrzf, cung2l, cung2r,
|
ctrsyl, ctrtrs, ctzrzf, cung2l, cung2r,
|
||||||
cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2,
|
cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2,
|
||||||
cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2,
|
cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2,
|
||||||
cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz,
|
cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz,
|
||||||
|
|
@ -321,18 +320,18 @@
|
||||||
dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv,
|
dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv,
|
||||||
dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2,
|
dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2,
|
||||||
dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx,
|
dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx,
|
||||||
dgegs, dgegv, dgehd2, dgehrd, dgelq2, dgelqf,
|
dgehd2, dgehrd, dgelq2, dgelqf,
|
||||||
dgels, dgelsd, dgelss, dgelsx, dgelsy, dgeql2, dgeqlf,
|
dgels, dgelsd, dgelss, dgelsy, dgeql2, dgeqlf,
|
||||||
dgeqp3, dgeqpf, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
|
dgeqp3, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
|
||||||
dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx,
|
dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx,
|
||||||
dgetc2, dgetri,
|
dgetc2, dgetri,
|
||||||
dggbak, dggbal, dgges, dggesx, dggev, dggevx,
|
dggbak, dggbal, dgges, dggesx, dggev, dggevx,
|
||||||
dggglm, dgghrd, dgglse, dggqrf,
|
dggglm, dgghrd, dgglse, dggqrf,
|
||||||
dggrqf, dggsvd, dggsvp, dgtcon, dgtrfs, dgtsv,
|
dggrqf, dgtcon, dgtrfs, dgtsv,
|
||||||
dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz,
|
dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz,
|
||||||
dhsein, dhseqr, dlabrd, dlacon, dlacn2,
|
dhsein, dhseqr, dlabrd, dlacon, dlacn2,
|
||||||
dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr,
|
dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr,
|
||||||
dlahrd, dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
|
dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
|
||||||
dlangb, dlange, dlangt, dlanhs, dlansb, dlansp,
|
dlangb, dlange, dlangt, dlanhs, dlansb, dlansp,
|
||||||
dlansy, dlantb, dlantp, dlantr, dlanv2,
|
dlansy, dlantb, dlantp, dlantr, dlanv2,
|
||||||
dlapll, dlapmt,
|
dlapll, dlapmt,
|
||||||
|
|
@ -342,7 +341,7 @@
|
||||||
dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx,
|
dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx,
|
||||||
dlargv, dlarrv, dlartv,
|
dlargv, dlarrv, dlartv,
|
||||||
dlarz, dlarzb, dlarzt, dlasy2, dlasyf,
|
dlarz, dlarzb, dlarzt, dlasy2, dlasyf,
|
||||||
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, dlatzm,
|
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz,
|
||||||
dopgtr, dopmtr, dorg2l, dorg2r,
|
dopgtr, dopmtr, dorg2l, dorg2r,
|
||||||
dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2,
|
dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2,
|
||||||
dorgrq, dorgtr, dorm2l, dorm2r,
|
dorgrq, dorgtr, dorm2l, dorm2r,
|
||||||
|
|
@ -368,7 +367,7 @@
|
||||||
dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri,
|
dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri,
|
||||||
dtptrs,
|
dtptrs,
|
||||||
dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl,
|
dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl,
|
||||||
dtrtrs, dtzrqf, dtzrzf, dstemr,
|
dtrtrs, dtzrzf, dstemr,
|
||||||
dsgesv, dsposv, dlag2s, slag2d, dlat2s,
|
dsgesv, dsposv, dlag2s, slag2d, dlat2s,
|
||||||
dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp,
|
dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp,
|
||||||
dtfttr, dtpttf, dtpttr, dtrttf, dtrttp,
|
dtfttr, dtpttf, dtpttr, dtrttf, dtrttp,
|
||||||
|
|
@ -387,14 +386,13 @@
|
||||||
zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx,
|
zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx,
|
||||||
zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd,
|
zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd,
|
||||||
zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx,
|
zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx,
|
||||||
zgegs, zgegv, zgehd2, zgehrd, zgelq2, zgelqf,
|
zgehd2, zgehrd, zgelq2, zgelqf,
|
||||||
zgels, zgelsd, zgelss, zgelsx, zgelsy, zgeql2, zgeqlf, zgeqp3,
|
zgels, zgelsd, zgelss, zgelsy, zgeql2, zgeqlf, zgeqp3,
|
||||||
zgeqpf, zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
|
zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
|
||||||
zgesc2, zgesdd, zgesvd, zgesvx, zgetc2,
|
zgesc2, zgesdd, zgesvd, zgesvx, zgetc2,
|
||||||
zgetri,
|
zgetri,
|
||||||
zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm,
|
zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm,
|
||||||
zgghrd, zgglse, zggqrf, zggrqf,
|
zgghrd, zgglse, zggqrf, zggrqf,
|
||||||
zggsvd, zggsvp,
|
|
||||||
zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev,
|
zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev,
|
||||||
zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd,
|
zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd,
|
||||||
zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst,
|
zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst,
|
||||||
|
|
@ -409,7 +407,7 @@
|
||||||
zlaed0, zlaed7, zlaed8,
|
zlaed0, zlaed7, zlaed8,
|
||||||
zlaein, zlaesy, zlaev2, zlags2, zlagtm,
|
zlaein, zlaesy, zlaev2, zlags2, zlagtm,
|
||||||
zlahef, zlahqr,
|
zlahef, zlahqr,
|
||||||
zlahrd, zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
|
zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
|
||||||
zlangt, zlanhb,
|
zlangt, zlanhb,
|
||||||
zlanhe,
|
zlanhe,
|
||||||
zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb,
|
zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb,
|
||||||
|
|
@ -422,7 +420,7 @@
|
||||||
zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv,
|
zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv,
|
||||||
zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr,
|
zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr,
|
||||||
zlassq, zlasyf,
|
zlassq, zlasyf,
|
||||||
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, zlatzm,
|
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz,
|
||||||
zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv,
|
zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv,
|
||||||
zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs,
|
zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs,
|
||||||
zposv, zposvx, zpotrs, zpstrf, zpstf2,
|
zposv, zposvx, zpotrs, zpstrf, zpstf2,
|
||||||
|
|
@ -438,7 +436,7 @@
|
||||||
ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon,
|
ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon,
|
||||||
ztprfs, ztptri,
|
ztprfs, ztptri,
|
||||||
ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna,
|
ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna,
|
||||||
ztrsyl, ztrtrs, ztzrqf, ztzrzf, zung2l,
|
ztrsyl, ztrtrs, ztzrzf, zung2l,
|
||||||
zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2,
|
zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2,
|
||||||
zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2,
|
zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2,
|
||||||
zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz,
|
zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz,
|
||||||
|
|
@ -452,6 +450,140 @@
|
||||||
zunbdb5, zunbdb6, zuncsd, zuncsd2by1,
|
zunbdb5, zunbdb6, zuncsd, zuncsd2by1,
|
||||||
zgeqrt, zgeqrt2, zgeqrt3, zgemqrt,
|
zgeqrt, zgeqrt2, zgeqrt3, zgemqrt,
|
||||||
ztpqrt, ztpqrt2, ztpmqrt, ztprfb,
|
ztpqrt, ztpqrt2, ztpmqrt, ztprfb,
|
||||||
|
# functions added for lapack-3.6.0
|
||||||
|
|
||||||
|
cgejsv,
|
||||||
|
cgesvdx,
|
||||||
|
cgesvj,
|
||||||
|
cgetrf2,
|
||||||
|
cgges3,
|
||||||
|
cggev3,
|
||||||
|
cgghd3,
|
||||||
|
cggsvd3,
|
||||||
|
cggsvp3,
|
||||||
|
cgsvj0,
|
||||||
|
cgsvj1,
|
||||||
|
clagge,
|
||||||
|
claghe,
|
||||||
|
clagsy,
|
||||||
|
clahilb,
|
||||||
|
clakf2,
|
||||||
|
clarge,
|
||||||
|
clarnd,
|
||||||
|
claror,
|
||||||
|
clarot,
|
||||||
|
clatm1,
|
||||||
|
clatm2,
|
||||||
|
clatm3,
|
||||||
|
clatm5,
|
||||||
|
clatm6,
|
||||||
|
clatme,
|
||||||
|
clatmr,
|
||||||
|
clatms,
|
||||||
|
clatmt,
|
||||||
|
cpotrf2,
|
||||||
|
csbmv,
|
||||||
|
cspr2,
|
||||||
|
csyr2,
|
||||||
|
cunm22,
|
||||||
|
dbdsvdx,
|
||||||
|
dgesvdx,
|
||||||
|
dgetrf2,
|
||||||
|
dgges3,
|
||||||
|
dggev3,
|
||||||
|
dgghd3,
|
||||||
|
dggsvd3,
|
||||||
|
dggsvp3,
|
||||||
|
dladiv2,
|
||||||
|
dlagge,
|
||||||
|
dlagsy,
|
||||||
|
dlahilb,
|
||||||
|
dlakf2,
|
||||||
|
dlaran,
|
||||||
|
dlarge,
|
||||||
|
dlarnd,
|
||||||
|
dlaror,
|
||||||
|
dlarot,
|
||||||
|
dlatm1,
|
||||||
|
dlatm2,
|
||||||
|
dlatm3,
|
||||||
|
dlatm5,
|
||||||
|
dlatm6,
|
||||||
|
dlatm7,
|
||||||
|
dlatme,
|
||||||
|
dlatmr,
|
||||||
|
dlatms,
|
||||||
|
dlatmt,
|
||||||
|
dorm22,
|
||||||
|
dpotrf2,
|
||||||
|
dsecnd,
|
||||||
|
sbdsvdx,
|
||||||
|
second,
|
||||||
|
sgesvdx,
|
||||||
|
sgetrf2,
|
||||||
|
sgges3,
|
||||||
|
sggev3,
|
||||||
|
sgghd3,
|
||||||
|
sggsvd3,
|
||||||
|
sggsvp3,
|
||||||
|
sladiv2,
|
||||||
|
slagge,
|
||||||
|
slagsy,
|
||||||
|
slahilb,
|
||||||
|
slakf2,
|
||||||
|
slaran,
|
||||||
|
slarge,
|
||||||
|
slarnd,
|
||||||
|
slaror,
|
||||||
|
slarot,
|
||||||
|
slatm1,
|
||||||
|
slatm2,
|
||||||
|
slatm3,
|
||||||
|
slatm5,
|
||||||
|
slatm6,
|
||||||
|
slatm7,
|
||||||
|
slatme,
|
||||||
|
slatmr,
|
||||||
|
slatms,
|
||||||
|
slatmt,
|
||||||
|
sorm22,
|
||||||
|
spotrf2,
|
||||||
|
xerbla,
|
||||||
|
zgejsv,
|
||||||
|
zgesvdx,
|
||||||
|
zgesvj,
|
||||||
|
zgetrf2,
|
||||||
|
zgges3,
|
||||||
|
zggev3,
|
||||||
|
zgghd3,
|
||||||
|
zggsvd3,
|
||||||
|
zggsvp3,
|
||||||
|
zgsvj0,
|
||||||
|
zgsvj1,
|
||||||
|
zlagge,
|
||||||
|
zlaghe,
|
||||||
|
zlagsy,
|
||||||
|
zlahilb,
|
||||||
|
zlakf2,
|
||||||
|
zlarge,
|
||||||
|
zlarnd,
|
||||||
|
zlaror,
|
||||||
|
zlarot,
|
||||||
|
zlatm1,
|
||||||
|
zlatm2,
|
||||||
|
zlatm3,
|
||||||
|
zlatm5,
|
||||||
|
zlatm6,
|
||||||
|
zlatme,
|
||||||
|
zlatmr,
|
||||||
|
zlatms,
|
||||||
|
zlatmt,
|
||||||
|
zpotrf2,
|
||||||
|
zsbmv,
|
||||||
|
zspr2,
|
||||||
|
zsyr2,
|
||||||
|
zunm22
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
@lapack_extendedprecision_objs = (
|
@lapack_extendedprecision_objs = (
|
||||||
|
|
@ -682,8 +814,6 @@
|
||||||
LAPACKE_cgeqlf_work,
|
LAPACKE_cgeqlf_work,
|
||||||
LAPACKE_cgeqp3,
|
LAPACKE_cgeqp3,
|
||||||
LAPACKE_cgeqp3_work,
|
LAPACKE_cgeqp3_work,
|
||||||
LAPACKE_cgeqpf,
|
|
||||||
LAPACKE_cgeqpf_work,
|
|
||||||
LAPACKE_cgeqr2,
|
LAPACKE_cgeqr2,
|
||||||
LAPACKE_cgeqr2_work,
|
LAPACKE_cgeqr2_work,
|
||||||
LAPACKE_cgeqrf,
|
LAPACKE_cgeqrf,
|
||||||
|
|
@ -738,10 +868,6 @@
|
||||||
LAPACKE_cggqrf_work,
|
LAPACKE_cggqrf_work,
|
||||||
LAPACKE_cggrqf,
|
LAPACKE_cggrqf,
|
||||||
LAPACKE_cggrqf_work,
|
LAPACKE_cggrqf_work,
|
||||||
LAPACKE_cggsvd,
|
|
||||||
LAPACKE_cggsvd_work,
|
|
||||||
LAPACKE_cggsvp,
|
|
||||||
LAPACKE_cggsvp_work,
|
|
||||||
LAPACKE_cgtcon,
|
LAPACKE_cgtcon,
|
||||||
LAPACKE_cgtcon_work,
|
LAPACKE_cgtcon_work,
|
||||||
LAPACKE_cgtrfs,
|
LAPACKE_cgtrfs,
|
||||||
|
|
@ -1186,8 +1312,6 @@
|
||||||
LAPACKE_dgeqlf_work,
|
LAPACKE_dgeqlf_work,
|
||||||
LAPACKE_dgeqp3,
|
LAPACKE_dgeqp3,
|
||||||
LAPACKE_dgeqp3_work,
|
LAPACKE_dgeqp3_work,
|
||||||
LAPACKE_dgeqpf,
|
|
||||||
LAPACKE_dgeqpf_work,
|
|
||||||
LAPACKE_dgeqr2,
|
LAPACKE_dgeqr2,
|
||||||
LAPACKE_dgeqr2_work,
|
LAPACKE_dgeqr2_work,
|
||||||
LAPACKE_dgeqrf,
|
LAPACKE_dgeqrf,
|
||||||
|
|
@ -1244,10 +1368,6 @@
|
||||||
LAPACKE_dggqrf_work,
|
LAPACKE_dggqrf_work,
|
||||||
LAPACKE_dggrqf,
|
LAPACKE_dggrqf,
|
||||||
LAPACKE_dggrqf_work,
|
LAPACKE_dggrqf_work,
|
||||||
LAPACKE_dggsvd,
|
|
||||||
LAPACKE_dggsvd_work,
|
|
||||||
LAPACKE_dggsvp,
|
|
||||||
LAPACKE_dggsvp_work,
|
|
||||||
LAPACKE_dgtcon,
|
LAPACKE_dgtcon,
|
||||||
LAPACKE_dgtcon_work,
|
LAPACKE_dgtcon_work,
|
||||||
LAPACKE_dgtrfs,
|
LAPACKE_dgtrfs,
|
||||||
|
|
@ -1676,8 +1796,6 @@
|
||||||
LAPACKE_sgeqlf_work,
|
LAPACKE_sgeqlf_work,
|
||||||
LAPACKE_sgeqp3,
|
LAPACKE_sgeqp3,
|
||||||
LAPACKE_sgeqp3_work,
|
LAPACKE_sgeqp3_work,
|
||||||
LAPACKE_sgeqpf,
|
|
||||||
LAPACKE_sgeqpf_work,
|
|
||||||
LAPACKE_sgeqr2,
|
LAPACKE_sgeqr2,
|
||||||
LAPACKE_sgeqr2_work,
|
LAPACKE_sgeqr2_work,
|
||||||
LAPACKE_sgeqrf,
|
LAPACKE_sgeqrf,
|
||||||
|
|
@ -1734,10 +1852,6 @@
|
||||||
LAPACKE_sggqrf_work,
|
LAPACKE_sggqrf_work,
|
||||||
LAPACKE_sggrqf,
|
LAPACKE_sggrqf,
|
||||||
LAPACKE_sggrqf_work,
|
LAPACKE_sggrqf_work,
|
||||||
LAPACKE_sggsvd,
|
|
||||||
LAPACKE_sggsvd_work,
|
|
||||||
LAPACKE_sggsvp,
|
|
||||||
LAPACKE_sggsvp_work,
|
|
||||||
LAPACKE_sgtcon,
|
LAPACKE_sgtcon,
|
||||||
LAPACKE_sgtcon_work,
|
LAPACKE_sgtcon_work,
|
||||||
LAPACKE_sgtrfs,
|
LAPACKE_sgtrfs,
|
||||||
|
|
@ -2158,8 +2272,6 @@
|
||||||
LAPACKE_zgeqlf_work,
|
LAPACKE_zgeqlf_work,
|
||||||
LAPACKE_zgeqp3,
|
LAPACKE_zgeqp3,
|
||||||
LAPACKE_zgeqp3_work,
|
LAPACKE_zgeqp3_work,
|
||||||
LAPACKE_zgeqpf,
|
|
||||||
LAPACKE_zgeqpf_work,
|
|
||||||
LAPACKE_zgeqr2,
|
LAPACKE_zgeqr2,
|
||||||
LAPACKE_zgeqr2_work,
|
LAPACKE_zgeqr2_work,
|
||||||
LAPACKE_zgeqrf,
|
LAPACKE_zgeqrf,
|
||||||
|
|
@ -2214,10 +2326,6 @@
|
||||||
LAPACKE_zggqrf_work,
|
LAPACKE_zggqrf_work,
|
||||||
LAPACKE_zggrqf,
|
LAPACKE_zggrqf,
|
||||||
LAPACKE_zggrqf_work,
|
LAPACKE_zggrqf_work,
|
||||||
LAPACKE_zggsvd,
|
|
||||||
LAPACKE_zggsvd_work,
|
|
||||||
LAPACKE_zggsvp,
|
|
||||||
LAPACKE_zggsvp_work,
|
|
||||||
LAPACKE_zgtcon,
|
LAPACKE_zgtcon,
|
||||||
LAPACKE_zgtcon_work,
|
LAPACKE_zgtcon_work,
|
||||||
LAPACKE_zgtrfs,
|
LAPACKE_zgtrfs,
|
||||||
|
|
@ -2707,6 +2815,134 @@
|
||||||
LAPACKE_slagsy_work,
|
LAPACKE_slagsy_work,
|
||||||
LAPACKE_zlagsy,
|
LAPACKE_zlagsy,
|
||||||
LAPACKE_zlagsy_work,
|
LAPACKE_zlagsy_work,
|
||||||
|
## new function from lapack-3.6.0
|
||||||
|
|
||||||
|
LAPACKE_cgejsv,
|
||||||
|
LAPACKE_cgejsv_work,
|
||||||
|
LAPACKE_cgesvdx,
|
||||||
|
LAPACKE_cgesvdx_work,
|
||||||
|
LAPACKE_cgesvj,
|
||||||
|
LAPACKE_cgesvj_work,
|
||||||
|
LAPACKE_cgetrf2,
|
||||||
|
LAPACKE_cgetrf2_work,
|
||||||
|
LAPACKE_cgges3,
|
||||||
|
LAPACKE_cgges3_work,
|
||||||
|
LAPACKE_cggev3,
|
||||||
|
LAPACKE_cggev3_work,
|
||||||
|
LAPACKE_cgghd3,
|
||||||
|
LAPACKE_cgghd3_work,
|
||||||
|
LAPACKE_cggsvd3,
|
||||||
|
LAPACKE_cggsvd3_work,
|
||||||
|
LAPACKE_cggsvp3,
|
||||||
|
LAPACKE_cggsvp3_work,
|
||||||
|
LAPACKE_chetrf_rook,
|
||||||
|
LAPACKE_chetrf_rook_work,
|
||||||
|
LAPACKE_chetrs_rook,
|
||||||
|
LAPACKE_chetrs_rook_work,
|
||||||
|
LAPACKE_clapmt,
|
||||||
|
LAPACKE_clapmt_work,
|
||||||
|
LAPACKE_clascl,
|
||||||
|
LAPACKE_clascl_work,
|
||||||
|
LAPACKE_cpotrf2,
|
||||||
|
LAPACKE_cpotrf2_work,
|
||||||
|
LAPACKE_csytrf_rook,
|
||||||
|
LAPACKE_csytrf_rook_work,
|
||||||
|
LAPACKE_csytrs_rook,
|
||||||
|
LAPACKE_csytrs_rook_work,
|
||||||
|
LAPACKE_cuncsd2by1,
|
||||||
|
LAPACKE_cuncsd2by1_work,
|
||||||
|
LAPACKE_dbdsvdx,
|
||||||
|
LAPACKE_dbdsvdx_work,
|
||||||
|
LAPACKE_dgesvdx,
|
||||||
|
LAPACKE_dgesvdx_work,
|
||||||
|
LAPACKE_dgetrf2,
|
||||||
|
LAPACKE_dgetrf2_work,
|
||||||
|
LAPACKE_dgges3,
|
||||||
|
LAPACKE_dgges3_work,
|
||||||
|
LAPACKE_dggev3,
|
||||||
|
LAPACKE_dggev3_work,
|
||||||
|
LAPACKE_dgghd3,
|
||||||
|
LAPACKE_dgghd3_work,
|
||||||
|
LAPACKE_dggsvd3,
|
||||||
|
LAPACKE_dggsvd3_work,
|
||||||
|
LAPACKE_dggsvp3,
|
||||||
|
LAPACKE_dggsvp3_work,
|
||||||
|
LAPACKE_dlapmt,
|
||||||
|
LAPACKE_dlapmt_work,
|
||||||
|
LAPACKE_dlascl,
|
||||||
|
LAPACKE_dlascl_work,
|
||||||
|
LAPACKE_dorcsd2by1,
|
||||||
|
LAPACKE_dorcsd2by1_work,
|
||||||
|
LAPACKE_dpotrf2,
|
||||||
|
LAPACKE_dpotrf2_work,
|
||||||
|
LAPACKE_dsytrf_rook,
|
||||||
|
LAPACKE_dsytrf_rook_work,
|
||||||
|
LAPACKE_dsytrs_rook,
|
||||||
|
LAPACKE_dsytrs_rook_work,
|
||||||
|
LAPACKE_sbdsvdx,
|
||||||
|
LAPACKE_sbdsvdx_work,
|
||||||
|
LAPACKE_sgesvdx,
|
||||||
|
LAPACKE_sgesvdx_work,
|
||||||
|
LAPACKE_sgetrf2,
|
||||||
|
LAPACKE_sgetrf2_work,
|
||||||
|
LAPACKE_sgges3,
|
||||||
|
LAPACKE_sgges3_work,
|
||||||
|
LAPACKE_sggev3,
|
||||||
|
LAPACKE_sggev3_work,
|
||||||
|
LAPACKE_sgghd3,
|
||||||
|
LAPACKE_sgghd3_work,
|
||||||
|
LAPACKE_sggsvd3,
|
||||||
|
LAPACKE_sggsvd3_work,
|
||||||
|
LAPACKE_sggsvp3,
|
||||||
|
LAPACKE_sggsvp3_work,
|
||||||
|
LAPACKE_slapmt,
|
||||||
|
LAPACKE_slapmt_work,
|
||||||
|
LAPACKE_slascl,
|
||||||
|
LAPACKE_slascl_work,
|
||||||
|
LAPACKE_sorcsd2by1,
|
||||||
|
LAPACKE_sorcsd2by1_work,
|
||||||
|
LAPACKE_spotrf2,
|
||||||
|
LAPACKE_spotrf2_work,
|
||||||
|
LAPACKE_ssytrf_rook,
|
||||||
|
LAPACKE_ssytrf_rook_work,
|
||||||
|
LAPACKE_ssytrs_rook,
|
||||||
|
LAPACKE_ssytrs_rook_work,
|
||||||
|
LAPACKE_stpqrt,
|
||||||
|
LAPACKE_stpqrt_work,
|
||||||
|
LAPACKE_zgejsv,
|
||||||
|
LAPACKE_zgejsv_work,
|
||||||
|
LAPACKE_zgesvdx,
|
||||||
|
LAPACKE_zgesvdx_work,
|
||||||
|
LAPACKE_zgesvj,
|
||||||
|
LAPACKE_zgesvj_work,
|
||||||
|
LAPACKE_zgetrf2,
|
||||||
|
LAPACKE_zgetrf2_work,
|
||||||
|
LAPACKE_zgges3,
|
||||||
|
LAPACKE_zgges3_work,
|
||||||
|
LAPACKE_zggev3,
|
||||||
|
LAPACKE_zggev3_work,
|
||||||
|
LAPACKE_zgghd3,
|
||||||
|
LAPACKE_zgghd3_work,
|
||||||
|
LAPACKE_zggsvd3,
|
||||||
|
LAPACKE_zggsvd3_work,
|
||||||
|
LAPACKE_zggsvp3,
|
||||||
|
LAPACKE_zggsvp3_work,
|
||||||
|
LAPACKE_zhetrf_rook,
|
||||||
|
LAPACKE_zhetrf_rook_work,
|
||||||
|
LAPACKE_zhetrs_rook,
|
||||||
|
LAPACKE_zhetrs_rook_work,
|
||||||
|
LAPACKE_zlapmt,
|
||||||
|
LAPACKE_zlapmt_work,
|
||||||
|
LAPACKE_zlascl,
|
||||||
|
LAPACKE_zlascl_work,
|
||||||
|
LAPACKE_zpotrf2,
|
||||||
|
LAPACKE_zpotrf2_work,
|
||||||
|
LAPACKE_zsytrf_rook,
|
||||||
|
LAPACKE_zsytrf_rook_work,
|
||||||
|
LAPACKE_zsytrs_rook,
|
||||||
|
LAPACKE_zsytrs_rook_work,
|
||||||
|
LAPACKE_zuncsd2by1,
|
||||||
|
LAPACKE_zuncsd2by1_work
|
||||||
);
|
);
|
||||||
|
|
||||||
#These function may need 2 underscores.
|
#These function may need 2 underscores.
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,8 @@ DAMAXKERNEL = amax.S
|
||||||
CAMAXKERNEL = zamax.S
|
CAMAXKERNEL = zamax.S
|
||||||
ZAMAXKERNEL = zamax.S
|
ZAMAXKERNEL = zamax.S
|
||||||
|
|
||||||
ISAMAXKERNEL = isamax.S
|
ISAMAXKERNEL = iamax.S
|
||||||
IDAMAXKERNEL = idamax.S
|
IDAMAXKERNEL = iamax.S
|
||||||
ICAMAXKERNEL = izamax.S
|
ICAMAXKERNEL = izamax.S
|
||||||
IZAMAXKERNEL = izamax.S
|
IZAMAXKERNEL = izamax.S
|
||||||
|
|
||||||
|
|
@ -25,13 +25,13 @@ DCOPYKERNEL = copy.S
|
||||||
CCOPYKERNEL = copy.S
|
CCOPYKERNEL = copy.S
|
||||||
ZCOPYKERNEL = copy.S
|
ZCOPYKERNEL = copy.S
|
||||||
|
|
||||||
DOTKERNEL = dot.S
|
SDOTKERNEL = dot.S
|
||||||
DDOTKERNEL = dot.S
|
DDOTKERNEL = dot.S
|
||||||
CDOTKERNEL = zdot.S
|
CDOTKERNEL = zdot.S
|
||||||
ZDOTKERNEL = zdot.S
|
ZDOTKERNEL = zdot.S
|
||||||
|
|
||||||
SNRM2KERNEL = snrm2.S
|
SNRM2KERNEL = nrm2.S
|
||||||
DNRM2KERNEL = dnrm2.S
|
DNRM2KERNEL = nrm2.S
|
||||||
CNRM2KERNEL = znrm2.S
|
CNRM2KERNEL = znrm2.S
|
||||||
ZNRM2KERNEL = znrm2.S
|
ZNRM2KERNEL = znrm2.S
|
||||||
|
|
||||||
|
|
@ -40,7 +40,7 @@ DROTKERNEL = rot.S
|
||||||
CROTKERNEL = zrot.S
|
CROTKERNEL = zrot.S
|
||||||
ZROTKERNEL = zrot.S
|
ZROTKERNEL = zrot.S
|
||||||
|
|
||||||
SCALKERNEL = scal.S
|
SSCALKERNEL = scal.S
|
||||||
DSCALKERNEL = scal.S
|
DSCALKERNEL = scal.S
|
||||||
CSCALKERNEL = zscal.S
|
CSCALKERNEL = zscal.S
|
||||||
ZSCALKERNEL = zscal.S
|
ZSCALKERNEL = zscal.S
|
||||||
|
|
|
||||||
|
|
@ -181,73 +181,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
fmul v16.4s, v0.4s, v8.4s[0]
|
fmul v16.4s, v0.4s, v8.4s[0]
|
||||||
OP_ii v16.4s, v1.4s, v9.4s[0]
|
OP_ii v16.4s, v1.4s, v9.4s[0]
|
||||||
fmul v17.4s, v0.4s, v9.4s[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v17.4s, v17.4s
|
eor v17.16b, v17.16b, v17.16b
|
||||||
|
fmls v17.4s, v0.4s, v9.4s[0]
|
||||||
|
#else
|
||||||
|
fmul v17.4s, v0.4s, v9.4s[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v17.4s, v1.4s, v8.4s[0]
|
OP_ir v17.4s, v1.4s, v8.4s[0]
|
||||||
|
|
||||||
fmul v20.4s, v0.4s, v8.4s[1]
|
fmul v20.4s, v0.4s, v8.4s[1]
|
||||||
OP_ii v20.4s, v1.4s, v9.4s[1]
|
OP_ii v20.4s, v1.4s, v9.4s[1]
|
||||||
fmul v21.4s, v0.4s, v9.4s[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v21.4s, v21.4s
|
eor v21.16b, v21.16b, v21.16b
|
||||||
|
fmls v21.4s, v0.4s, v9.4s[1]
|
||||||
|
#else
|
||||||
|
fmul v21.4s, v0.4s, v9.4s[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v21.4s, v1.4s, v8.4s[1]
|
OP_ir v21.4s, v1.4s, v8.4s[1]
|
||||||
|
|
||||||
fmul v24.4s, v0.4s, v8.4s[2]
|
fmul v24.4s, v0.4s, v8.4s[2]
|
||||||
OP_ii v24.4s, v1.4s, v9.4s[2]
|
OP_ii v24.4s, v1.4s, v9.4s[2]
|
||||||
fmul v25.4s, v0.4s, v9.4s[2]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v25.4s, v25.4s
|
eor v25.16b, v25.16b, v25.16b
|
||||||
|
fmls v25.4s, v0.4s, v9.4s[2]
|
||||||
|
#else
|
||||||
|
fmul v25.4s, v0.4s, v9.4s[2]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v25.4s, v1.4s, v8.4s[2]
|
OP_ir v25.4s, v1.4s, v8.4s[2]
|
||||||
|
|
||||||
fmul v28.4s, v0.4s, v8.4s[3]
|
fmul v28.4s, v0.4s, v8.4s[3]
|
||||||
OP_ii v28.4s, v1.4s, v9.4s[3]
|
OP_ii v28.4s, v1.4s, v9.4s[3]
|
||||||
fmul v29.4s, v0.4s, v9.4s[3]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v29.4s, v29.4s
|
eor v29.16b, v29.16b, v29.16b
|
||||||
|
fmls v29.4s, v0.4s, v9.4s[3]
|
||||||
|
#else
|
||||||
|
fmul v29.4s, v0.4s, v9.4s[3]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v29.4s, v1.4s, v8.4s[3]
|
OP_ir v29.4s, v1.4s, v8.4s[3]
|
||||||
|
|
||||||
fmul v18.4s, v2.4s, v8.4s[0]
|
fmul v18.4s, v2.4s, v8.4s[0]
|
||||||
OP_ii v18.4s, v3.4s, v9.4s[0]
|
OP_ii v18.4s, v3.4s, v9.4s[0]
|
||||||
fmul v19.4s, v2.4s, v9.4s[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v19.4s, v19.4s
|
eor v19.16b, v19.16b, v19.16b
|
||||||
|
fmls v19.4s, v2.4s, v9.4s[0]
|
||||||
|
#else
|
||||||
|
fmul v19.4s, v2.4s, v9.4s[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v19.4s, v3.4s, v8.4s[0]
|
OP_ir v19.4s, v3.4s, v8.4s[0]
|
||||||
|
|
||||||
fmul v22.4s, v2.4s, v8.4s[1]
|
fmul v22.4s, v2.4s, v8.4s[1]
|
||||||
OP_ii v22.4s, v3.4s, v9.4s[1]
|
OP_ii v22.4s, v3.4s, v9.4s[1]
|
||||||
fmul v23.4s, v2.4s, v9.4s[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v23.4s, v23.4s
|
eor v23.16b, v23.16b, v23.16b
|
||||||
|
fmls v23.4s, v2.4s, v9.4s[1]
|
||||||
|
#else
|
||||||
|
fmul v23.4s, v2.4s, v9.4s[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v23.4s, v3.4s, v8.4s[1]
|
OP_ir v23.4s, v3.4s, v8.4s[1]
|
||||||
|
|
||||||
fmul v26.4s, v2.4s, v8.4s[2]
|
fmul v26.4s, v2.4s, v8.4s[2]
|
||||||
OP_ii v26.4s, v3.4s, v9.4s[2]
|
OP_ii v26.4s, v3.4s, v9.4s[2]
|
||||||
fmul v27.4s, v2.4s, v9.4s[2]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v27.4s, v27.4s
|
eor v27.16b, v27.16b, v27.16b
|
||||||
|
fmls v27.4s, v2.4s, v9.4s[2]
|
||||||
|
#else
|
||||||
|
fmul v27.4s, v2.4s, v9.4s[2]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v27.4s, v3.4s, v8.4s[2]
|
OP_ir v27.4s, v3.4s, v8.4s[2]
|
||||||
|
|
||||||
fmul v30.4s, v2.4s, v8.4s[3]
|
fmul v30.4s, v2.4s, v8.4s[3]
|
||||||
OP_ii v30.4s, v3.4s, v9.4s[3]
|
OP_ii v30.4s, v3.4s, v9.4s[3]
|
||||||
fmul v31.4s, v2.4s, v9.4s[3]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v31.4s, v31.4s
|
eor v31.16b, v31.16b, v31.16b
|
||||||
|
fmls v31.4s, v2.4s, v9.4s[3]
|
||||||
|
#else
|
||||||
|
fmul v31.4s, v2.4s, v9.4s[3]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v31.4s, v3.4s, v8.4s[3]
|
OP_ir v31.4s, v3.4s, v8.4s[3]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -172,37 +172,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
fmul v16.4s, v0.4s, v8.4s[0]
|
fmul v16.4s, v0.4s, v8.4s[0]
|
||||||
OP_ii v16.4s, v1.4s, v9.4s[0]
|
OP_ii v16.4s, v1.4s, v9.4s[0]
|
||||||
fmul v17.4s, v0.4s, v9.4s[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v17.4s, v17.4s
|
eor v17.16b, v17.16b, v17.16b
|
||||||
|
fmls v17.4s, v0.4s, v9.4s[0]
|
||||||
|
#else
|
||||||
|
fmul v17.4s, v0.4s, v9.4s[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v17.4s, v1.4s, v8.4s[0]
|
OP_ir v17.4s, v1.4s, v8.4s[0]
|
||||||
|
|
||||||
fmul v20.4s, v0.4s, v8.4s[1]
|
fmul v20.4s, v0.4s, v8.4s[1]
|
||||||
OP_ii v20.4s, v1.4s, v9.4s[1]
|
OP_ii v20.4s, v1.4s, v9.4s[1]
|
||||||
fmul v21.4s, v0.4s, v9.4s[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v21.4s, v21.4s
|
eor v21.16b, v21.16b, v21.16b
|
||||||
|
fmls v21.4s, v0.4s, v9.4s[1]
|
||||||
|
#else
|
||||||
|
fmul v21.4s, v0.4s, v9.4s[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v21.4s, v1.4s, v8.4s[1]
|
OP_ir v21.4s, v1.4s, v8.4s[1]
|
||||||
|
|
||||||
fmul v24.4s, v0.4s, v8.4s[2]
|
fmul v24.4s, v0.4s, v8.4s[2]
|
||||||
OP_ii v24.4s, v1.4s, v9.4s[2]
|
OP_ii v24.4s, v1.4s, v9.4s[2]
|
||||||
fmul v25.4s, v0.4s, v9.4s[2]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v25.4s, v25.4s
|
eor v25.16b, v25.16b, v25.16b
|
||||||
|
fmls v25.4s, v0.4s, v9.4s[2]
|
||||||
|
#else
|
||||||
|
fmul v25.4s, v0.4s, v9.4s[2]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v25.4s, v1.4s, v8.4s[2]
|
OP_ir v25.4s, v1.4s, v8.4s[2]
|
||||||
|
|
||||||
fmul v28.4s, v0.4s, v8.4s[3]
|
fmul v28.4s, v0.4s, v8.4s[3]
|
||||||
OP_ii v28.4s, v1.4s, v9.4s[3]
|
OP_ii v28.4s, v1.4s, v9.4s[3]
|
||||||
fmul v29.4s, v0.4s, v9.4s[3]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v29.4s, v29.4s
|
eor v29.16b, v29.16b, v29.16b
|
||||||
|
fmls v29.4s, v0.4s, v9.4s[3]
|
||||||
|
#else
|
||||||
|
fmul v29.4s, v0.4s, v9.4s[3]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v29.4s, v1.4s, v8.4s[3]
|
OP_ir v29.4s, v1.4s, v8.4s[3]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -45,16 +45,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define COND ge
|
#define COND ge
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
#define MAXF s0
|
||||||
|
#define TMPF s1
|
||||||
|
#define TMPVF {v1.s}[0]
|
||||||
|
#define SZ 4
|
||||||
|
#else
|
||||||
#define MAXF d0
|
#define MAXF d0
|
||||||
#define TMPF d1
|
#define TMPF d1
|
||||||
#define TMPVF {v1.d}[0]
|
#define TMPVF {v1.d}[0]
|
||||||
#define SZ 8
|
#define SZ 8
|
||||||
|
#endif
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
|
||||||
.macro INIT_S
|
.macro INIT_S
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
lsl INC_X, INC_X, #2
|
||||||
|
ld1 {v0.s}[0], [X], INC_X
|
||||||
|
#else
|
||||||
lsl INC_X, INC_X, #3
|
lsl INC_X, INC_X, #3
|
||||||
ld1 {v0.d}[0], [X], INC_X
|
ld1 {v0.d}[0], [X], INC_X
|
||||||
|
#endif
|
||||||
mov Z, #1
|
mov Z, #1
|
||||||
mov INDEX, Z
|
mov INDEX, Z
|
||||||
fabs MAXF, MAXF
|
fabs MAXF, MAXF
|
||||||
|
|
@ -107,9 +119,8 @@ iamax_kernel_S1:
|
||||||
iamax_kernel_S10:
|
iamax_kernel_S10:
|
||||||
|
|
||||||
KERNEL_S1
|
KERNEL_S1
|
||||||
|
subs I, I, #1
|
||||||
subs I, I, #1
|
bne iamax_kernel_S10
|
||||||
bne iamax_kernel_S10
|
|
||||||
|
|
||||||
iamax_kernel_L999:
|
iamax_kernel_L999:
|
||||||
|
|
||||||
|
|
@ -1,213 +0,0 @@
|
||||||
/*******************************************************************************
|
|
||||||
Copyright (c) 2015, The OpenBLAS Project
|
|
||||||
All rights reserved.
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are
|
|
||||||
met:
|
|
||||||
1. Redistributions of source code must retain the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer.
|
|
||||||
2. Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in
|
|
||||||
the documentation and/or other materials provided with the
|
|
||||||
distribution.
|
|
||||||
3. Neither the name of the OpenBLAS project nor the names of
|
|
||||||
its contributors may be used to endorse or promote products
|
|
||||||
derived from this software without specific prior written permission.
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
||||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
||||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
||||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|
||||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
#define ASSEMBLER
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#define N x0 /* vector length */
|
|
||||||
#define X x1 /* X vector address */
|
|
||||||
#define INC_X x2 /* X stride */
|
|
||||||
#define INDEX x3 /* index of max/min value */
|
|
||||||
#define Z x4 /* vector index */
|
|
||||||
#define I x5 /* loop variable */
|
|
||||||
#define X_COPY x6 /* copy of X address */
|
|
||||||
#define MAXF_Z x7
|
|
||||||
|
|
||||||
/*******************************************************************************
|
|
||||||
* Macro definitions
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
#define MAXF s5
|
|
||||||
#define TMPF s6
|
|
||||||
#define TMPVF {v6.s}[0]
|
|
||||||
#define SZ 4
|
|
||||||
|
|
||||||
/******************************************************************************/
|
|
||||||
|
|
||||||
.macro INIT_F1
|
|
||||||
ldr MAXF, [X], #SZ
|
|
||||||
mov Z, #1
|
|
||||||
mov INDEX, Z
|
|
||||||
fabs MAXF, MAXF
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro KERNEL_F1
|
|
||||||
ldr TMPF, [X], #SZ
|
|
||||||
add Z, Z, #1
|
|
||||||
fabs TMPF, TMPF
|
|
||||||
fcmp TMPF, MAXF
|
|
||||||
fcsel MAXF, MAXF, TMPF, le
|
|
||||||
csel INDEX, INDEX, Z, le
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro INIT_F4
|
|
||||||
ld1 {v0.4s}, [X], #16
|
|
||||||
fabs v0.4s, v0.4s
|
|
||||||
fmaxv MAXF, v0.4s
|
|
||||||
mov Z, #5
|
|
||||||
mov MAXF_Z, #1
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro KERNEL_F4
|
|
||||||
ld1 {v0.4s}, [X], #16
|
|
||||||
fabs v0.4s, v0.4s
|
|
||||||
fmaxv TMPF, v0.4s
|
|
||||||
PRFM PLDL1KEEP, [X, #512]
|
|
||||||
fcmp TMPF, MAXF
|
|
||||||
fcsel MAXF, MAXF, TMPF, le
|
|
||||||
csel MAXF_Z, MAXF_Z, Z, le
|
|
||||||
add Z, Z, #4
|
|
||||||
.endm
|
|
||||||
|
|
||||||
|
|
||||||
.macro KERNEL_F4_FINALIZE
|
|
||||||
mov INDEX, MAXF_Z
|
|
||||||
sub MAXF_Z, MAXF_Z, #1
|
|
||||||
lsl MAXF_Z, MAXF_Z, #2
|
|
||||||
add X_COPY, X_COPY, MAXF_Z
|
|
||||||
ldr TMPF, [X_COPY], #SZ
|
|
||||||
fabs TMPF, TMPF
|
|
||||||
fcmp TMPF, MAXF
|
|
||||||
beq KERNEL_F4_FINALIZE_DONE
|
|
||||||
add INDEX, INDEX, #1
|
|
||||||
ldr TMPF, [X_COPY], #SZ
|
|
||||||
fabs TMPF, TMPF
|
|
||||||
fcmp TMPF, MAXF
|
|
||||||
beq KERNEL_F4_FINALIZE_DONE
|
|
||||||
add INDEX, INDEX, #1
|
|
||||||
ldr TMPF, [X_COPY], #SZ
|
|
||||||
fabs TMPF, TMPF
|
|
||||||
fcmp TMPF, MAXF
|
|
||||||
beq KERNEL_F4_FINALIZE_DONE
|
|
||||||
add INDEX, INDEX, #1
|
|
||||||
KERNEL_F4_FINALIZE_DONE:
|
|
||||||
.endm
|
|
||||||
|
|
||||||
|
|
||||||
.macro INIT_S
|
|
||||||
lsl INC_X, INC_X, #2
|
|
||||||
ld1 TMPVF, [X], INC_X
|
|
||||||
mov Z, #1
|
|
||||||
mov INDEX, Z
|
|
||||||
fabs MAXF, TMPF
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro KERNEL_S1
|
|
||||||
ld1 TMPVF, [X], INC_X
|
|
||||||
add Z, Z, #1
|
|
||||||
fabs TMPF, TMPF
|
|
||||||
fcmp TMPF, MAXF
|
|
||||||
fcsel MAXF, MAXF, TMPF, le
|
|
||||||
csel INDEX, INDEX, Z, le
|
|
||||||
.endm
|
|
||||||
|
|
||||||
/*******************************************************************************
|
|
||||||
* End of macro definitions
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
PROLOGUE
|
|
||||||
|
|
||||||
cmp N, xzr
|
|
||||||
ble iamax_kernel_zero
|
|
||||||
cmp INC_X, xzr
|
|
||||||
ble iamax_kernel_zero
|
|
||||||
|
|
||||||
PRFM PLDL1KEEP, [X]
|
|
||||||
mov X_COPY, X
|
|
||||||
|
|
||||||
cmp INC_X, #1
|
|
||||||
bne iamax_kernel_S_BEGIN
|
|
||||||
|
|
||||||
iamax_kernel_F_BEGIN:
|
|
||||||
asr I, N, #2
|
|
||||||
cmp I, xzr
|
|
||||||
beq iamax_kernel_F1_INIT
|
|
||||||
|
|
||||||
INIT_F4
|
|
||||||
subs I, I, #1
|
|
||||||
beq iamax_kernel_F4_FINALIZE
|
|
||||||
|
|
||||||
iamax_kernel_F4:
|
|
||||||
KERNEL_F4
|
|
||||||
subs I, I, #1
|
|
||||||
bne iamax_kernel_F4
|
|
||||||
|
|
||||||
iamax_kernel_F4_FINALIZE:
|
|
||||||
KERNEL_F4_FINALIZE
|
|
||||||
|
|
||||||
iamax_kernel_F1:
|
|
||||||
ands I, N, #3
|
|
||||||
ble iamax_kernel_L999
|
|
||||||
|
|
||||||
iamax_kernel_F10:
|
|
||||||
KERNEL_F1
|
|
||||||
subs I, I, #1
|
|
||||||
bne iamax_kernel_F10
|
|
||||||
b iamax_kernel_L999
|
|
||||||
|
|
||||||
iamax_kernel_F1_INIT:
|
|
||||||
INIT_F1
|
|
||||||
subs N, N, #1
|
|
||||||
b iamax_kernel_F1
|
|
||||||
|
|
||||||
iamax_kernel_S_BEGIN:
|
|
||||||
INIT_S
|
|
||||||
|
|
||||||
subs N, N, #1
|
|
||||||
ble iamax_kernel_L999
|
|
||||||
|
|
||||||
asr I, N, #2
|
|
||||||
cmp I, xzr
|
|
||||||
ble iamax_kernel_S1
|
|
||||||
|
|
||||||
iamax_kernel_S4:
|
|
||||||
KERNEL_S1
|
|
||||||
KERNEL_S1
|
|
||||||
KERNEL_S1
|
|
||||||
KERNEL_S1
|
|
||||||
|
|
||||||
subs I, I, #1
|
|
||||||
bne iamax_kernel_S4
|
|
||||||
|
|
||||||
iamax_kernel_S1:
|
|
||||||
ands I, N, #3
|
|
||||||
ble iamax_kernel_L999
|
|
||||||
|
|
||||||
iamax_kernel_S10:
|
|
||||||
KERNEL_S1
|
|
||||||
subs I, I, #1
|
|
||||||
bne iamax_kernel_S10
|
|
||||||
|
|
||||||
iamax_kernel_L999:
|
|
||||||
mov x0, INDEX
|
|
||||||
ret
|
|
||||||
|
|
||||||
iamax_kernel_zero:
|
|
||||||
mov x0, xzr
|
|
||||||
ret
|
|
||||||
|
|
||||||
EPILOGUE
|
|
||||||
|
|
@ -0,0 +1,225 @@
|
||||||
|
/*******************************************************************************
|
||||||
|
Copyright (c) 2015, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#define N x0
|
||||||
|
#define X x1
|
||||||
|
#define INC_X x2
|
||||||
|
|
||||||
|
#define I x3
|
||||||
|
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
#define SSQ s0
|
||||||
|
#define SCALE s1
|
||||||
|
#define REGZERO s5
|
||||||
|
#define REGONE s6
|
||||||
|
#else
|
||||||
|
#define SSQ d0
|
||||||
|
#define SCALE d1
|
||||||
|
#define REGZERO d5
|
||||||
|
#define REGONE d6
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* Macro definitions
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
.macro KERNEL_F1
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
ldr s4, [X], #4
|
||||||
|
fcmp s4, REGZERO
|
||||||
|
beq KERNEL_F1_NEXT_\@
|
||||||
|
fabs s4, s4
|
||||||
|
fcmp SCALE, s4
|
||||||
|
bge KERNEL_F1_SCALE_GE_X_\@
|
||||||
|
fdiv s2, SCALE, s4
|
||||||
|
fmul s2, s2, s2
|
||||||
|
fmul s3, SSQ, s2
|
||||||
|
fadd SSQ, REGONE, s3
|
||||||
|
fmov SCALE, s4
|
||||||
|
b KERNEL_F1_NEXT_\@
|
||||||
|
KERNEL_F1_SCALE_GE_X_\@:
|
||||||
|
fdiv s2, s4, SCALE
|
||||||
|
fmla SSQ, s2, v2.s[0]
|
||||||
|
#else
|
||||||
|
ldr d4, [X], #8
|
||||||
|
fcmp d4, REGZERO
|
||||||
|
beq KERNEL_F1_NEXT_\@
|
||||||
|
fabs d4, d4
|
||||||
|
fcmp SCALE, d4
|
||||||
|
bge KERNEL_F1_SCALE_GE_X_\@
|
||||||
|
fdiv d2, SCALE, d4
|
||||||
|
fmul d2, d2, d2
|
||||||
|
fmul d3, SSQ, d2
|
||||||
|
fadd SSQ, REGONE, d3
|
||||||
|
fmov SCALE, d4
|
||||||
|
b KERNEL_F1_NEXT_\@
|
||||||
|
KERNEL_F1_SCALE_GE_X_\@:
|
||||||
|
fdiv d2, d4, SCALE
|
||||||
|
fmla SSQ, d2, v2.d[0]
|
||||||
|
#endif
|
||||||
|
KERNEL_F1_NEXT_\@:
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro KERNEL_S1
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
ldr s4, [X]
|
||||||
|
fcmp s4, REGZERO
|
||||||
|
beq KERNEL_S1_NEXT
|
||||||
|
fabs s4, s4
|
||||||
|
fcmp SCALE, s4
|
||||||
|
bge KERNEL_S1_SCALE_GE_X
|
||||||
|
fdiv s2, SCALE, s4
|
||||||
|
fmul s2, s2, s2
|
||||||
|
fmul s3, SSQ, s2
|
||||||
|
fadd SSQ, REGONE, s3
|
||||||
|
fmov SCALE, s4
|
||||||
|
b KERNEL_S1_NEXT
|
||||||
|
KERNEL_S1_SCALE_GE_X:
|
||||||
|
fdiv s2, s4, SCALE
|
||||||
|
fmla SSQ, s2, v2.s[0]
|
||||||
|
#else
|
||||||
|
ldr d4, [X]
|
||||||
|
fcmp d4, REGZERO
|
||||||
|
beq KERNEL_S1_NEXT
|
||||||
|
fabs d4, d4
|
||||||
|
fcmp SCALE, d4
|
||||||
|
bge KERNEL_S1_SCALE_GE_X
|
||||||
|
fdiv d2, SCALE, d4
|
||||||
|
fmul d2, d2, d2
|
||||||
|
fmul d3, SSQ, d2
|
||||||
|
fadd SSQ, REGONE, d3
|
||||||
|
fmov SCALE, d4
|
||||||
|
b KERNEL_S1_NEXT
|
||||||
|
KERNEL_S1_SCALE_GE_X:
|
||||||
|
fdiv d2, d4, SCALE
|
||||||
|
fmla SSQ, d2, v2.d[0]
|
||||||
|
#endif
|
||||||
|
KERNEL_S1_NEXT:
|
||||||
|
add X, X, INC_X
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro KERNEL_F8
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro INIT_S
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
lsl INC_X, INC_X, #2 // INC_X * SIZE
|
||||||
|
#else
|
||||||
|
lsl INC_X, INC_X, #3 // INC_X * SIZE
|
||||||
|
#endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro INIT
|
||||||
|
eor v1.16b, v1.16b, v1.16b // scale=0.0
|
||||||
|
fmov SSQ, #1.0
|
||||||
|
fmov REGONE, SSQ
|
||||||
|
fmov REGZERO, SCALE
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* End of macro definitions
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
|
||||||
|
INIT
|
||||||
|
|
||||||
|
cmp N, #0
|
||||||
|
ble nrm2_kernel_L999
|
||||||
|
|
||||||
|
cmp INC_X, #0
|
||||||
|
beq nrm2_kernel_L999
|
||||||
|
|
||||||
|
|
||||||
|
cmp INC_X, #1
|
||||||
|
bne nrm2_kernel_S_BEGIN
|
||||||
|
|
||||||
|
nrm2_kernel_F_BEGIN:
|
||||||
|
|
||||||
|
asr I, N, #3 // I = N / 8
|
||||||
|
cmp I, xzr
|
||||||
|
ble nrm2_kernel_F1
|
||||||
|
|
||||||
|
nrm2_kernel_F8:
|
||||||
|
|
||||||
|
KERNEL_F8
|
||||||
|
|
||||||
|
subs I, I, #1
|
||||||
|
bne nrm2_kernel_F8
|
||||||
|
|
||||||
|
nrm2_kernel_F1:
|
||||||
|
|
||||||
|
ands I, N, #7
|
||||||
|
ble nrm2_kernel_L999
|
||||||
|
|
||||||
|
|
||||||
|
nrm2_kernel_F10:
|
||||||
|
|
||||||
|
KERNEL_F1
|
||||||
|
|
||||||
|
subs I, I, #1
|
||||||
|
bne nrm2_kernel_F10
|
||||||
|
|
||||||
|
b nrm2_kernel_L999
|
||||||
|
|
||||||
|
nrm2_kernel_S_BEGIN:
|
||||||
|
|
||||||
|
INIT_S
|
||||||
|
|
||||||
|
mov I, N
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
|
||||||
|
nrm2_kernel_S10:
|
||||||
|
|
||||||
|
KERNEL_S1
|
||||||
|
|
||||||
|
subs I, I, #1
|
||||||
|
bne nrm2_kernel_S10
|
||||||
|
|
||||||
|
|
||||||
|
nrm2_kernel_L999:
|
||||||
|
fsqrt SSQ, SSQ
|
||||||
|
fmul SSQ, SCALE, SSQ
|
||||||
|
|
||||||
|
ret
|
||||||
|
|
||||||
|
EPILOGUE
|
||||||
|
|
||||||
|
|
@ -59,10 +59,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
.macro INIT_F1
|
.macro INIT_F1
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
fneg s2, S
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub s2, s2, S
|
||||||
ins v1.s[1], v2.s[0] // [-S, S]
|
ins v1.s[1], v2.s[0] // [-S, S]
|
||||||
#else
|
#else
|
||||||
fneg d2, S
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub d2, d2, S
|
||||||
ins v1.d[1], v2.d[0] // [-S, S]
|
ins v1.d[1], v2.d[0] // [-S, S]
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
|
||||||
|
|
@ -43,14 +43,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
#define DA_R s0 /* scale input value */
|
#define DA_R s0 /* scale input value */
|
||||||
#define DA_I s1 /* scale input value */
|
#define DA_I s1 /* scale input value */
|
||||||
#define TMPX v2.2s
|
|
||||||
#define TMPY v3.2s
|
|
||||||
#define SZ 4
|
#define SZ 4
|
||||||
#else
|
#else
|
||||||
#define DA_R d0 /* scale input value */
|
#define DA_R d0 /* scale input value */
|
||||||
#define DA_I d1 /* scale input value */
|
#define DA_I d1 /* scale input value */
|
||||||
#define TMPX v2.2d
|
|
||||||
#define TMPY v3.2d
|
|
||||||
#define SZ 8
|
#define SZ 8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
@ -61,22 +57,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
||||||
fneg s2, DA_I
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub s2, s2, DA_I
|
||||||
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
|
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
|
||||||
ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I
|
ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I
|
||||||
#else
|
#else
|
||||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
||||||
fneg d2, DA_I
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub d2, d2, DA_I
|
||||||
ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I
|
ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I
|
||||||
ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I
|
ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
fneg s2, DA_R
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub s2, s2, DA_R
|
||||||
ins v0.s[1], v2.s[0] // v0 = -DA_R, DA_R
|
ins v0.s[1], v2.s[0] // v0 = -DA_R, DA_R
|
||||||
ins v1.s[1], v1.s[0] // v1 = DA_I, DA_I
|
ins v1.s[1], v1.s[0] // v1 = DA_I, DA_I
|
||||||
#else
|
#else
|
||||||
fneg d2, DA_R
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub d2, d2, DA_R
|
||||||
ins v0.d[1], v2.d[0] // v0 = -DA_R, DA_R
|
ins v0.d[1], v2.d[0] // v0 = -DA_R, DA_R
|
||||||
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I
|
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -111,9 +111,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.macro KERNEL_INIT_F4
|
.macro KERNEL_INIT_F4
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
// Replicate the lower 2 floats into the upper 2 slots
|
ins v16.s[0], v0.s[0]
|
||||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R
|
ins v16.s[1], v16.s[0]
|
||||||
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I
|
ins v16.d[1], v16.d[0]
|
||||||
|
#if !defined(CONJ)
|
||||||
|
ins v17.s[0], v1.s[1]
|
||||||
|
#else
|
||||||
|
ins v17.s[0], v1.s[0]
|
||||||
|
#endif
|
||||||
|
ins v17.s[1], v17.s[0]
|
||||||
|
ins v17.d[1], v17.d[0]
|
||||||
|
#else //DOUBLE
|
||||||
|
ins v16.d[0], v0.d[0]
|
||||||
|
ins v16.d[1], v16.d[0]
|
||||||
|
#if !defined(CONJ)
|
||||||
|
ins v17.d[0], v1.d[1]
|
||||||
|
#else
|
||||||
|
ins v17.d[0], v1.d[0]
|
||||||
|
#endif
|
||||||
|
ins v17.d[1], v17.d[0]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
@ -121,55 +137,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.macro KERNEL_F4
|
.macro KERNEL_F4
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ld1 {v2.4s,v3.4s}, [X], #32 // V2 = X[3], X[2], X[1], X[0]
|
ld2 {v2.4s, v3.4s}, [X], #32
|
||||||
// V3 = X[7], X[6], X[5], X[4]
|
ld2 {v4.4s, v5.4s}, [Y_COPY], #32
|
||||||
ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1]
|
|
||||||
ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1]
|
|
||||||
ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1]
|
|
||||||
|
|
||||||
ld1 {v4.4s,v5.4s}, [Y] // V4 = Y[3], Y[2], Y[1], Y[0]
|
fmla v4.4s, v2.4s, v16.4s
|
||||||
// V5 = Y[7], Y[6], Y[5], Y[4]
|
#if !defined(CONJ)
|
||||||
|
fmls v4.4s, v3.4s, v17.4s
|
||||||
|
#else
|
||||||
|
fmla v4.4s, v3.4s, v17.4s
|
||||||
|
#endif
|
||||||
|
|
||||||
ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5]
|
#if !defined(CONJ)
|
||||||
ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5]
|
fmla v5.4s, v2.4s, v17.4s
|
||||||
ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5]
|
#else
|
||||||
|
fmls v5.4s, v2.4s, v17.4s
|
||||||
|
#endif
|
||||||
|
fmla v5.4s, v3.4s, v16.4s
|
||||||
|
|
||||||
fmla v4.4s, v0.4s, v2.4s // Y[iy] += DA_R * X[ix]
|
st2 {v4.4s, v5.4s}, [Y], #32
|
||||||
// Y[iy+1] += +-DA_R * X[ix+1]
|
|
||||||
fmla v4.4s, v1.4s, v6.4s // Y[iy] += +-DA_I * X[ix+1]
|
|
||||||
// Y[iy+1] += DA_I * X[ix]
|
|
||||||
st1 {v4.4s}, [Y], #16
|
|
||||||
|
|
||||||
fmla v5.4s, v0.4s, v3.4s // Y[iy] += DA_R * X[ix]
|
|
||||||
fmla v5.4s, v1.4s, v7.4s // Y[iy] += +-DA_I * X[ix+1]
|
|
||||||
// Y[iy+1] += +-DA_R * X[ix+1]
|
|
||||||
// Y[iy+1] += DA_I * X[ix]
|
|
||||||
st1 {v5.4s}, [Y], #16
|
|
||||||
#else // DOUBLE
|
#else // DOUBLE
|
||||||
ld1 {v2.2d,v3.2d}, [X], #32 // CX0, CX1, CX2, CX3
|
ld2 {v2.2d, v3.2d}, [X], #32
|
||||||
ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1]
|
ld2 {v4.2d, v5.2d}, [Y_COPY], #32
|
||||||
ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1]
|
|
||||||
|
|
||||||
ld1 {v4.2d,v5.2d}, [X], #32 // CX0, CX1, CX2, CX3
|
fmla v4.2d, v2.2d, v16.2d
|
||||||
ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1]
|
#if !defined(CONJ)
|
||||||
ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1]
|
fmls v4.2d, v3.2d, v17.2d
|
||||||
|
#else
|
||||||
|
fmla v4.2d, v3.2d, v17.2d
|
||||||
|
#endif
|
||||||
|
#if !defined(CONJ)
|
||||||
|
fmla v5.2d, v2.2d, v17.2d
|
||||||
|
#else
|
||||||
|
fmls v5.2d, v2.2d, v17.2d
|
||||||
|
#endif
|
||||||
|
fmla v5.2d, v3.2d, v16.2d
|
||||||
|
|
||||||
ld1 {v16.2d,v17.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3
|
st2 {v4.2d, v5.2d}, [Y], #32
|
||||||
|
|
||||||
fmla v16.2d, v0.2d, v2.2d
|
ld2 {v18.2d, v19.2d}, [X], #32
|
||||||
fmla v17.2d, v0.2d, v3.2d
|
ld2 {v20.2d, v21.2d}, [Y_COPY], #32
|
||||||
|
|
||||||
ld1 {v18.2d,v19.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3
|
fmla v20.2d, v18.2d, v16.2d
|
||||||
|
#if !defined(CONJ)
|
||||||
|
fmls v20.2d, v19.2d, v17.2d
|
||||||
|
#else
|
||||||
|
fmla v20.2d, v19.2d, v17.2d
|
||||||
|
#endif
|
||||||
|
#if !defined(CONJ)
|
||||||
|
fmla v21.2d, v18.2d, v17.2d
|
||||||
|
#else
|
||||||
|
fmls v21.2d, v18.2d, v17.2d
|
||||||
|
#endif
|
||||||
|
fmla v21.2d, v19.2d, v16.2d
|
||||||
|
|
||||||
fmla v16.2d, v1.2d, v20.2d
|
st2 {v20.2d, v21.2d}, [Y], #32
|
||||||
fmla v17.2d, v1.2d, v21.2d
|
|
||||||
st1 {v16.2d,v17.2d}, [Y], #32
|
|
||||||
|
|
||||||
fmla v18.2d, v0.2d, v4.2d
|
|
||||||
fmla v19.2d, v0.2d, v5.2d
|
|
||||||
fmla v18.2d, v1.2d, v22.2d
|
|
||||||
fmla v19.2d, v1.2d, v23.2d
|
|
||||||
st1 {v18.2d,v19.2d}, [Y], #32
|
|
||||||
#endif
|
#endif
|
||||||
PRFM PLDL1KEEP, [X, #512]
|
PRFM PLDL1KEEP, [X, #512]
|
||||||
PRFM PLDL1KEEP, [Y, #512]
|
PRFM PLDL1KEEP, [Y, #512]
|
||||||
|
|
|
||||||
|
|
@ -184,73 +184,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
fmul v16.2d, v0.2d, v8.2d[0]
|
fmul v16.2d, v0.2d, v8.2d[0]
|
||||||
OP_ii v16.2d, v1.2d, v9.2d[0]
|
OP_ii v16.2d, v1.2d, v9.2d[0]
|
||||||
fmul v17.2d, v0.2d, v9.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v17.2d, v17.2d
|
eor v17.16b, v17.16b, v17.16b
|
||||||
|
fmls v17.2d, v0.2d, v9.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v17.2d, v0.2d, v9.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v17.2d, v1.2d, v8.2d[0]
|
OP_ir v17.2d, v1.2d, v8.2d[0]
|
||||||
|
|
||||||
fmul v18.2d, v2.2d, v8.2d[0]
|
fmul v18.2d, v2.2d, v8.2d[0]
|
||||||
OP_ii v18.2d, v3.2d, v9.2d[0]
|
OP_ii v18.2d, v3.2d, v9.2d[0]
|
||||||
fmul v19.2d, v2.2d, v9.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v19.2d, v19.2d
|
eor v19.16b, v19.16b, v19.16b
|
||||||
|
fmls v19.2d, v2.2d, v9.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v19.2d, v2.2d, v9.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v19.2d, v3.2d, v8.2d[0]
|
OP_ir v19.2d, v3.2d, v8.2d[0]
|
||||||
|
|
||||||
fmul v20.2d, v0.2d, v8.2d[1]
|
fmul v20.2d, v0.2d, v8.2d[1]
|
||||||
OP_ii v20.2d, v1.2d, v9.2d[1]
|
OP_ii v20.2d, v1.2d, v9.2d[1]
|
||||||
fmul v21.2d, v0.2d, v9.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v21.2d, v21.2d
|
eor v21.16b, v21.16b, v21.16b
|
||||||
|
fmls v21.2d, v0.2d, v9.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v21.2d, v0.2d, v9.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v21.2d, v1.2d, v8.2d[1]
|
OP_ir v21.2d, v1.2d, v8.2d[1]
|
||||||
|
|
||||||
fmul v22.2d, v2.2d, v8.2d[1]
|
fmul v22.2d, v2.2d, v8.2d[1]
|
||||||
OP_ii v22.2d, v3.2d, v9.2d[1]
|
OP_ii v22.2d, v3.2d, v9.2d[1]
|
||||||
fmul v23.2d, v2.2d, v9.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v23.2d, v23.2d
|
eor v23.16b, v23.16b, v23.16b
|
||||||
|
fmls v23.2d, v2.2d, v9.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v23.2d, v2.2d, v9.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v23.2d, v3.2d, v8.2d[1]
|
OP_ir v23.2d, v3.2d, v8.2d[1]
|
||||||
|
|
||||||
fmul v24.2d, v0.2d, v10.2d[0]
|
fmul v24.2d, v0.2d, v10.2d[0]
|
||||||
OP_ii v24.2d, v1.2d, v11.2d[0]
|
OP_ii v24.2d, v1.2d, v11.2d[0]
|
||||||
fmul v25.2d, v0.2d, v11.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v25.2d, v25.2d
|
eor v25.16b, v25.16b, v25.16b
|
||||||
|
fmls v25.2d, v0.2d, v11.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v25.2d, v0.2d, v11.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v25.2d, v1.2d, v10.2d[0]
|
OP_ir v25.2d, v1.2d, v10.2d[0]
|
||||||
|
|
||||||
fmul v26.2d, v2.2d, v10.2d[0]
|
fmul v26.2d, v2.2d, v10.2d[0]
|
||||||
OP_ii v26.2d, v3.2d, v11.2d[0]
|
OP_ii v26.2d, v3.2d, v11.2d[0]
|
||||||
fmul v27.2d, v2.2d, v11.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v27.2d, v27.2d
|
eor v27.16b, v27.16b, v27.16b
|
||||||
|
fmls v27.2d, v2.2d, v11.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v27.2d, v2.2d, v11.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v27.2d, v3.2d, v10.2d[0]
|
OP_ir v27.2d, v3.2d, v10.2d[0]
|
||||||
|
|
||||||
fmul v28.2d, v0.2d, v10.2d[1]
|
fmul v28.2d, v0.2d, v10.2d[1]
|
||||||
OP_ii v28.2d, v1.2d, v11.2d[1]
|
OP_ii v28.2d, v1.2d, v11.2d[1]
|
||||||
fmul v29.2d, v0.2d, v11.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v29.2d, v29.2d
|
eor v29.16b, v29.16b, v29.16b
|
||||||
|
fmls v29.2d, v0.2d, v11.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v29.2d, v0.2d, v11.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v29.2d, v1.2d, v10.2d[1]
|
OP_ir v29.2d, v1.2d, v10.2d[1]
|
||||||
|
|
||||||
fmul v30.2d, v2.2d, v10.2d[1]
|
fmul v30.2d, v2.2d, v10.2d[1]
|
||||||
OP_ii v30.2d, v3.2d, v11.2d[1]
|
OP_ii v30.2d, v3.2d, v11.2d[1]
|
||||||
fmul v31.2d, v2.2d, v11.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v31.2d, v31.2d
|
eor v31.16b, v31.16b, v31.16b
|
||||||
|
fmls v31.2d, v2.2d, v11.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v31.2d, v2.2d, v11.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v31.2d, v3.2d, v10.2d[1]
|
OP_ir v31.2d, v3.2d, v10.2d[1]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -110,15 +110,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
/******* INIT FOR F1 AND S1 LOOP ******/
|
/******* INIT FOR F1 AND S1 LOOP ******/
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ins v0.s[1], v0.s[0] // R(ALPHA), R(ALPHA)
|
ins v0.s[1], v0.s[0] // R(ALPHA), R(ALPHA)
|
||||||
fneg s2, ALPHA_I
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub s2, s2, ALPHA_I
|
||||||
ins v1.s[1], v2.s[0] // -I(ALPHA), I(ALPHA)
|
ins v1.s[1], v2.s[0] // -I(ALPHA), I(ALPHA)
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
ext v1.8b, v1.8b, v1.8b, #4 // I(ALPHA), -I(ALPHA)
|
ext v1.8b, v1.8b, v1.8b, #4 // I(ALPHA), -I(ALPHA)
|
||||||
#endif
|
#endif
|
||||||
#else
|
#else
|
||||||
ins v0.d[1], v0.d[0] // R(ALPHA), R(ALPHA)
|
ins v0.d[1], v0.d[0] // R(ALPHA), R(ALPHA)
|
||||||
fneg d2, ALPHA_I
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub d2, d2, ALPHA_I
|
||||||
ins v1.d[1], v2.d[0] // -I(ALPHA), I(ALPHA)
|
ins v1.d[1], v2.d[0] // -I(ALPHA), I(ALPHA)
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
ext v1.16b, v1.16b, v1.16b, #8 // I(ALPHA), -I(ALPHA)
|
ext v1.16b, v1.16b, v1.16b, #8 // I(ALPHA), -I(ALPHA)
|
||||||
|
|
@ -156,8 +158,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
fmul v11.4s, v9.4s, v7.4s // [+ R(X) * R(ALPHA)]
|
fmul v11.4s, v9.4s, v7.4s // [+ R(X) * R(ALPHA)]
|
||||||
fmls v11.4s, v10.4s, v8.4s // [- I(X) * I(ALPHA)]
|
fmls v11.4s, v10.4s, v8.4s // [- I(X) * I(ALPHA)]
|
||||||
fmul v12.4s, v9.4s, v8.4s // [R(X) * I(ALPHA)]
|
eor v12.16b, v12.16b, v12.16b
|
||||||
fneg v12.4s, v12.4s // [- R(X) * I(ALPHA)]
|
fmls v12.4s, v9.4s, v8.4s // [- R(X) * I(ALPHA)]
|
||||||
fmla v12.4s, v10.4s, v7.4s // [- I(X) * R(ALPHA)]
|
fmla v12.4s, v10.4s, v7.4s // [- I(X) * R(ALPHA)]
|
||||||
#endif
|
#endif
|
||||||
#endif // CONJ
|
#endif // CONJ
|
||||||
|
|
@ -170,24 +172,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ins v3.s[0], v2.s[1]
|
ins v3.s[0], v2.s[1]
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
fneg s4, s3
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub s4, s4, s3
|
||||||
ins v3.s[1], v4.s[0]
|
ins v3.s[1], v4.s[0]
|
||||||
ext v3.8b, v3.8b, v3.8b, #4 // [I(TEMP), -I(TEMP)]
|
ext v3.8b, v3.8b, v3.8b, #4 // [I(TEMP), -I(TEMP)]
|
||||||
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
|
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
|
||||||
#else
|
#else
|
||||||
fneg s4, s3
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub s4, s4, s3
|
||||||
ins v3.s[1], v4.s[0] // [-I(TEMP), I(TEMP)]
|
ins v3.s[1], v4.s[0] // [-I(TEMP), I(TEMP)]
|
||||||
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
|
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
|
||||||
#endif
|
#endif
|
||||||
#else // CONJ
|
#else // CONJ
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
ins v3.s[1], v3.s[0] // [I(TEMP), I(TEMP)]
|
ins v3.s[1], v3.s[0] // [I(TEMP), I(TEMP)]
|
||||||
fneg s4, s2
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub s4, s4, s2
|
||||||
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
|
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
|
||||||
#else
|
#else
|
||||||
fneg s3, s3
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub s3, s4, s3
|
||||||
ins v3.s[1], v3.s[0] // [-I(TEMP), -I(TEMP)]
|
ins v3.s[1], v3.s[0] // [-I(TEMP), -I(TEMP)]
|
||||||
fneg s4, s2
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub s4, s4, s2
|
||||||
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
|
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
|
||||||
#endif
|
#endif
|
||||||
#endif // CONJ
|
#endif // CONJ
|
||||||
|
|
@ -220,8 +227,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#else
|
#else
|
||||||
fmul v11.2d, v9.2d, v7.2d // [+ R(X) * R(ALPHA)]
|
fmul v11.2d, v9.2d, v7.2d // [+ R(X) * R(ALPHA)]
|
||||||
fmls v11.2d, v10.2d, v8.2d // [- I(X) * I(ALPHA)]
|
fmls v11.2d, v10.2d, v8.2d // [- I(X) * I(ALPHA)]
|
||||||
fmul v12.2d, v9.2d, v8.2d // [R(X) * I(ALPHA)]
|
eor v12.16b, v12.16b, v12.16b
|
||||||
fneg v12.2d, v12.2d // [- R(X) * I(ALPHA)]
|
fmls v12.2d, v9.2d, v8.2d // [- R(X) * I(ALPHA)]
|
||||||
fmla v12.2d, v10.2d, v7.2d // [- I(X) * R(ALPHA)]
|
fmla v12.2d, v10.2d, v7.2d // [- I(X) * R(ALPHA)]
|
||||||
#endif
|
#endif
|
||||||
#endif // CONJ
|
#endif // CONJ
|
||||||
|
|
@ -234,24 +241,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ins v3.d[0], v2.d[1] // I(TEMP)
|
ins v3.d[0], v2.d[1] // I(TEMP)
|
||||||
#if !defined(CONJ)
|
#if !defined(CONJ)
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
fneg d4, d3 // -I(TEMP)
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub d4, d4, d3
|
||||||
ins v3.d[1], v4.d[0]
|
ins v3.d[1], v4.d[0]
|
||||||
ext v3.16b, v3.16b, v3.16b, #8 // [I(TEMP), -I(TEMP)]
|
ext v3.16b, v3.16b, v3.16b, #8 // [I(TEMP), -I(TEMP)]
|
||||||
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
|
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
|
||||||
#else
|
#else
|
||||||
fneg d4, d3 // -I(TEMP)
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub d4, d4, d3
|
||||||
ins v3.d[1], v4.d[0] // [-I(TEMP), I(TEMP)]
|
ins v3.d[1], v4.d[0] // [-I(TEMP), I(TEMP)]
|
||||||
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
|
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
|
||||||
#endif
|
#endif
|
||||||
#else // CONJ
|
#else // CONJ
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
ins v3.d[1], v3.d[0] // [I(TEMP), I(TEMP)]
|
ins v3.d[1], v3.d[0] // [I(TEMP), I(TEMP)]
|
||||||
fneg d4, d2 // -R(TEMP)
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub d4, d4, d2
|
||||||
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
|
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
|
||||||
#else
|
#else
|
||||||
fneg d3, d3 // -I(TEMP)
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub d3, d4, d3
|
||||||
ins v3.d[1], v3.d[0] // [-I(TEMP), -I(TEMP)]
|
ins v3.d[1], v3.d[0] // [-I(TEMP), -I(TEMP)]
|
||||||
fneg d4, d2 // -R(TEMP)
|
eor v4.16b, v4.16b, v4.16b
|
||||||
|
fsub d4, d4, d2
|
||||||
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
|
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
|
||||||
#endif
|
#endif
|
||||||
#endif // CONJ
|
#endif // CONJ
|
||||||
|
|
|
||||||
|
|
@ -96,22 +96,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#if !defined(XCONJ)
|
#if !defined(XCONJ)
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ins v0.s[1], v0.s[0] // v0 = ALPHA_R, ALPHA_R
|
ins v0.s[1], v0.s[0] // v0 = ALPHA_R, ALPHA_R
|
||||||
fneg s2, ALPHA_I
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub s2, s2, ALPHA_I
|
||||||
ins v1.s[1], v2.s[0]
|
ins v1.s[1], v2.s[0]
|
||||||
ext v1.8b, v1.8b, v1.8b, #4 // v1 = ALPHA_I, -ALPHA_I
|
ext v1.8b, v1.8b, v1.8b, #4 // v1 = ALPHA_I, -ALPHA_I
|
||||||
#else
|
#else
|
||||||
ins v0.d[1], v0.d[0] // v0 = ALPHA_R, ALPHA_R
|
ins v0.d[1], v0.d[0] // v0 = ALPHA_R, ALPHA_R
|
||||||
fneg d2, ALPHA_I
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub d2, d2, ALPHA_I
|
||||||
ins v1.d[1], v2.d[0]
|
ins v1.d[1], v2.d[0]
|
||||||
ext v1.16b, v1.16b, v1.16b, #8 // v1 = ALPHA_I, -ALPHA_I
|
ext v1.16b, v1.16b, v1.16b, #8 // v1 = ALPHA_I, -ALPHA_I
|
||||||
#endif
|
#endif
|
||||||
#else // XCONJ
|
#else // XCONJ
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
fneg s2, ALPHA_R
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub s2, s2, ALPHA_R
|
||||||
ins v0.s[1], v2.s[0] // v0 = -ALPHA_R, ALPHA_R
|
ins v0.s[1], v2.s[0] // v0 = -ALPHA_R, ALPHA_R
|
||||||
ins v1.s[1], v1.s[0] // v1 = ALPHA_I, ALPHA_I
|
ins v1.s[1], v1.s[0] // v1 = ALPHA_I, ALPHA_I
|
||||||
#else
|
#else
|
||||||
fneg d2, ALPHA_R
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub d2, d2, ALPHA_R
|
||||||
ins v0.d[1], v2.d[0] // v0 = -ALPHA_R, ALPHA_R
|
ins v0.d[1], v2.d[0] // v0 = -ALPHA_R, ALPHA_R
|
||||||
ins v1.d[1], v1.d[0] // v1 = ALPHA_I, ALPHA_I
|
ins v1.d[1], v1.d[0] // v1 = ALPHA_I, ALPHA_I
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -136,89 +140,51 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld2 {v11.4s, v12.4s}, [X_PTR], #32
|
ld2 {v11.4s, v12.4s}, [X_PTR], #32
|
||||||
ld2 {v13.4s, v14.4s}, [A_PTR], #32
|
ld2 {v13.4s, v14.4s}, [A_PTR], #32
|
||||||
|
|
||||||
#if !defined(CONJ)
|
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
|
||||||
#if !defined(XCONJ)
|
|
||||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
||||||
fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I]
|
fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I]
|
||||||
fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I]
|
fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I]
|
||||||
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
|
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
|
||||||
#else
|
#else
|
||||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
|
||||||
fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I]
|
|
||||||
fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I]
|
|
||||||
fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R]
|
|
||||||
#endif
|
|
||||||
#else // CONJ
|
|
||||||
#if !defined(XCONJ)
|
|
||||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
||||||
fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I]
|
fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I]
|
||||||
fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I]
|
fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I]
|
||||||
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
|
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
|
||||||
#else
|
|
||||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
|
||||||
fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I]
|
|
||||||
fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I]
|
|
||||||
fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R]
|
|
||||||
#endif
|
#endif
|
||||||
#endif // CONJ
|
|
||||||
|
|
||||||
#else // DOUBLE
|
#else // DOUBLE
|
||||||
ld2 {v11.2d, v12.2d}, [X_PTR], #32
|
ld2 {v11.2d, v12.2d}, [X_PTR], #32
|
||||||
ld2 {v13.2d, v14.2d}, [A_PTR], #32
|
ld2 {v13.2d, v14.2d}, [A_PTR], #32
|
||||||
prfm PLDL1STRM, [X_PTR, #512]
|
prfm PLDL1STRM, [X_PTR, #512]
|
||||||
#if !defined(CONJ)
|
|
||||||
#if !defined(XCONJ)
|
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
|
||||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
||||||
fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I]
|
fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I]
|
||||||
fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I]
|
fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I]
|
||||||
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
|
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
|
||||||
#else
|
#else
|
||||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
|
||||||
fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I]
|
|
||||||
fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I]
|
|
||||||
fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R]
|
|
||||||
#endif
|
|
||||||
#else // CONJ
|
|
||||||
#if !defined(XCONJ)
|
|
||||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
||||||
fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I]
|
fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I]
|
||||||
fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I]
|
fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I]
|
||||||
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
|
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
|
||||||
#else
|
|
||||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
|
||||||
fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I]
|
|
||||||
fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I]
|
|
||||||
fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R]
|
|
||||||
#endif
|
#endif
|
||||||
#endif // CONJ
|
|
||||||
ld2 {v17.2d, v18.2d}, [X_PTR], #32
|
ld2 {v17.2d, v18.2d}, [X_PTR], #32
|
||||||
ld2 {v19.2d, v20.2d}, [A_PTR], #32
|
ld2 {v19.2d, v20.2d}, [A_PTR], #32
|
||||||
prfm PLDL1STRM, [A_PTR, #512]
|
prfm PLDL1STRM, [A_PTR, #512]
|
||||||
#if !defined(CONJ)
|
|
||||||
#if !defined(XCONJ)
|
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
|
||||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
||||||
fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
||||||
fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
||||||
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
||||||
#else
|
#else
|
||||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
|
||||||
fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
|
||||||
fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
|
||||||
fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
|
||||||
#endif
|
|
||||||
#else // CONJ
|
|
||||||
#if !defined(XCONJ)
|
|
||||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
||||||
fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
||||||
fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
||||||
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
||||||
#else
|
|
||||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
|
||||||
fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
|
||||||
fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
|
||||||
fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
|
||||||
#endif
|
#endif
|
||||||
#endif // CONJ
|
|
||||||
#endif //DOUBLE
|
#endif //DOUBLE
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
|
|
@ -252,7 +218,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
|
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
|
||||||
ld1 {v5.s}[0], [A_PTR], #4 // A1
|
ld1 {v5.s}[0], [A_PTR], #4 // A1
|
||||||
ld1 {v6.2s}, [X_PTR], #8 // [X1, X0]
|
ld1 {v6.2s}, [X_PTR], #8 // [X1, X0]
|
||||||
fneg s16, s5
|
eor v16.16b, v16.16b, v16.16b
|
||||||
|
fsub s16, s16, s5
|
||||||
ins v5.s[1], v16.s[0] // [-A1, A1]
|
ins v5.s[1], v16.s[0] // [-A1, A1]
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
|
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
|
||||||
|
|
@ -264,7 +231,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
|
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
|
||||||
ld1 {v5.d}[0], [A_PTR], #8 // A1
|
ld1 {v5.d}[0], [A_PTR], #8 // A1
|
||||||
ld1 {v6.2d}, [X_PTR], #16 // [X1, X0]
|
ld1 {v6.2d}, [X_PTR], #16 // [X1, X0]
|
||||||
fneg d16, d5
|
eor v16.16b, v16.16b, v16.16b
|
||||||
|
fsub d16, d16, d5
|
||||||
ins v5.d[1], v16.d[0] // [-A1, A1]
|
ins v5.d[1], v16.d[0] // [-A1, A1]
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]
|
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]
|
||||||
|
|
@ -284,7 +252,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
|
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
|
||||||
ld1 {v5.s}[0], [A_PTR], #4 // A1
|
ld1 {v5.s}[0], [A_PTR], #4 // A1
|
||||||
ld1 {v6.2s}, [X_PTR], INC_X // [X1, X0]
|
ld1 {v6.2s}, [X_PTR], INC_X // [X1, X0]
|
||||||
fneg s16, s5
|
eor v16.16b, v16.16b, v16.16b
|
||||||
|
fsub s16, s16, s5
|
||||||
ins v5.s[1], v16.s[0] // [-A1, A1]
|
ins v5.s[1], v16.s[0] // [-A1, A1]
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
|
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
|
||||||
|
|
@ -296,7 +265,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
|
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
|
||||||
ld1 {v5.d}[0], [A_PTR], #8 // A1
|
ld1 {v5.d}[0], [A_PTR], #8 // A1
|
||||||
ld1 {v6.2d}, [X_PTR], INC_X // [X1, X0]
|
ld1 {v6.2d}, [X_PTR], INC_X // [X1, X0]
|
||||||
fneg d16, d5
|
eor v16.16b, v16.16b, v16.16b
|
||||||
|
fsub d16, d16, d5
|
||||||
ins v5.d[1], v16.d[0] // [-A1, A1]
|
ins v5.d[1], v16.d[0] // [-A1, A1]
|
||||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||||
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]
|
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]
|
||||||
|
|
|
||||||
|
|
@ -28,201 +28,261 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define ASSEMBLER
|
#define ASSEMBLER
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
#define N x0 /* vector length */
|
#define N x0
|
||||||
#define X x1 /* X vector address */
|
#define X x1
|
||||||
#define INC_X x2 /* X stride */
|
#define INC_X x2
|
||||||
#define I x5 /* loop variable */
|
|
||||||
|
|
||||||
/*******************************************************************************
|
#define I x3
|
||||||
* Macro definitions
|
|
||||||
*******************************************************************************/
|
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
#define TMPF s6
|
#define SSQ s0
|
||||||
#define SSQ s0
|
#define SCALE s1
|
||||||
#define TMPVF {v6.s}[0]
|
#define REGZERO s6
|
||||||
#define SZ 4
|
#define REGONE s7
|
||||||
#else
|
#else
|
||||||
#define TMPF d6
|
#define SSQ d0
|
||||||
#define SSQ d0
|
#define SCALE d1
|
||||||
#define TMPVF {v6.d}[0]
|
#define REGZERO d6
|
||||||
#define SZ 8
|
#define REGONE d7
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/******************************************************************************/
|
/**************************************************************************************
|
||||||
|
* Macro definitions
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
.macro KERNEL_F1
|
.macro KERNEL_F1
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ld1 {v1.2s}, [X], #8
|
ldr s4, [X], #4
|
||||||
fmul v1.2s, v1.2s, v1.2s
|
fcmp s4, REGZERO
|
||||||
faddp TMPF, v1.2s
|
beq KERNEL_F1_NEXT_\@
|
||||||
fadd SSQ, SSQ, TMPF
|
fabs s4, s4
|
||||||
|
fcmp SCALE, s4
|
||||||
|
bge KERNEL_F1_SCALE_GE_XR_\@
|
||||||
|
fdiv s2, SCALE, s4
|
||||||
|
fmul s2, s2, s2
|
||||||
|
fmul s3, SSQ, s2
|
||||||
|
fadd SSQ, REGONE, s3
|
||||||
|
fmov SCALE, s4
|
||||||
|
b KERNEL_F1_NEXT_\@
|
||||||
|
KERNEL_F1_SCALE_GE_XR_\@:
|
||||||
|
fdiv s2, s4, SCALE
|
||||||
|
fmla SSQ, s2, v2.s[0]
|
||||||
|
KERNEL_F1_NEXT_\@:
|
||||||
|
ldr s5, [X], #4
|
||||||
|
fcmp s5, REGZERO
|
||||||
|
beq KERNEL_F1_END_\@
|
||||||
|
fabs s5, s5
|
||||||
|
fcmp SCALE, s5
|
||||||
|
bge KERNEL_F1_SCALE_GE_XI_\@
|
||||||
|
fdiv s2, SCALE, s5
|
||||||
|
fmul s2, s2, s2
|
||||||
|
fmul s3, SSQ, s2
|
||||||
|
fadd SSQ, REGONE, s3
|
||||||
|
fmov SCALE, s5
|
||||||
|
b KERNEL_F1_END_\@
|
||||||
|
KERNEL_F1_SCALE_GE_XI_\@:
|
||||||
|
fdiv s2, s5, SCALE
|
||||||
|
fmla SSQ, s2, v2.s[0]
|
||||||
#else
|
#else
|
||||||
ld1 {v1.2d}, [X], #16
|
ldr d4, [X], #8
|
||||||
fmul v1.2d, v1.2d, v1.2d
|
fcmp d4, REGZERO
|
||||||
faddp TMPF, v1.2d
|
beq KERNEL_F1_NEXT_\@
|
||||||
fadd SSQ, SSQ, TMPF
|
fabs d4, d4
|
||||||
#endif
|
fcmp SCALE, d4
|
||||||
.endm
|
bge KERNEL_F1_SCALE_GE_XR_\@
|
||||||
|
fdiv d2, SCALE, d4
|
||||||
.macro KERNEL_F8
|
fmul d2, d2, d2
|
||||||
#if !defined(DOUBLE)
|
fmul d3, SSQ, d2
|
||||||
ld1 {v1.4s, v2.4s}, [X], #32
|
fadd SSQ, REGONE, d3
|
||||||
fmla v0.4s, v1.4s, v1.4s
|
fmov SCALE, d4
|
||||||
fmla v5.4s, v2.4s, v2.4s
|
b KERNEL_F1_NEXT_\@
|
||||||
ld1 {v3.4s,v4.4s}, [X], #32
|
KERNEL_F1_SCALE_GE_XR_\@:
|
||||||
fmla v0.4s, v3.4s, v3.4s
|
fdiv d2, d4, SCALE
|
||||||
fmla v5.4s, v4.4s, v4.4s
|
fmla SSQ, d2, v2.d[0]
|
||||||
PRFM PLDL1KEEP, [X, #1024]
|
KERNEL_F1_NEXT_\@:
|
||||||
#else // DOUBLE
|
ldr d5, [X], #8
|
||||||
ld1 {v1.2d, v2.2d}, [X], #32
|
fcmp d5, REGZERO
|
||||||
fmla v0.2d, v1.2d, v1.2d
|
beq KERNEL_F1_END_\@
|
||||||
fmla v5.2d, v2.2d, v2.2d
|
fabs d5, d5
|
||||||
ld1 {v3.2d, v4.2d}, [X], #32
|
fcmp SCALE, d5
|
||||||
fmla v0.2d, v3.2d, v3.2d
|
bge KERNEL_F1_SCALE_GE_XI_\@
|
||||||
fmla v5.2d, v4.2d, v4.2d
|
fdiv d2, SCALE, d5
|
||||||
|
fmul d2, d2, d2
|
||||||
ld1 {v16.2d, v17.2d}, [X], #32
|
fmul d3, SSQ, d2
|
||||||
fmla v0.2d, v16.2d, v16.2d
|
fadd SSQ, REGONE, d3
|
||||||
fmla v5.2d, v17.2d, v17.2d
|
fmov SCALE, d5
|
||||||
ld1 {v18.2d, v19.2d}, [X], #32
|
b KERNEL_F1_END_\@
|
||||||
fmla v0.2d, v18.2d, v18.2d
|
KERNEL_F1_SCALE_GE_XI_\@:
|
||||||
fmla v5.2d, v19.2d, v19.2d
|
fdiv d2, d5, SCALE
|
||||||
#endif
|
fmla SSQ, d2, v2.d[0]
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro nrm2_kernel_F8_FINALIZE
|
|
||||||
#if !defined(DOUBLE)
|
|
||||||
fadd v0.4s, v0.4s, v5.4s
|
|
||||||
ext v1.16b, v0.16b, v0.16b, #8
|
|
||||||
fadd v0.2s, v0.2s, v1.2s
|
|
||||||
faddp SSQ, v0.2s
|
|
||||||
#else
|
|
||||||
fadd v0.2d, v0.2d, v5.2d
|
|
||||||
faddp SSQ, v0.2d
|
|
||||||
#endif
|
|
||||||
.endm
|
|
||||||
|
|
||||||
.macro INIT_S
|
|
||||||
#if !defined(DOUBLE)
|
|
||||||
lsl INC_X, INC_X, #3
|
|
||||||
ld1 {v1.2s}, [X], INC_X
|
|
||||||
fmul v1.2s, v1.2s, v1.2s
|
|
||||||
faddp SSQ, v1.2s
|
|
||||||
#else
|
|
||||||
lsl INC_X, INC_X, #4
|
|
||||||
ld1 {v1.2d}, [X], INC_X
|
|
||||||
fmul v1.2d, v1.2d, v1.2d
|
|
||||||
faddp SSQ, v1.2d
|
|
||||||
#endif
|
#endif
|
||||||
|
KERNEL_F1_END_\@:
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro KERNEL_S1
|
.macro KERNEL_S1
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ld1 {v1.2s}, [X], INC_X
|
ldr s4, [X]
|
||||||
fmul v1.2s, v1.2s, v1.2s
|
fcmp s4, REGZERO
|
||||||
faddp TMPF, v1.2s
|
beq KERNEL_S1_NEXT_\@
|
||||||
fadd SSQ, SSQ, TMPF
|
fabs s4, s4
|
||||||
|
fcmp SCALE, s4
|
||||||
|
bge KERNEL_S1_SCALE_GE_XR_\@
|
||||||
|
fdiv s2, SCALE, s4
|
||||||
|
fmul s2, s2, s2
|
||||||
|
fmul s3, SSQ, s2
|
||||||
|
fadd SSQ, REGONE, s3
|
||||||
|
fmov SCALE, s4
|
||||||
|
b KERNEL_S1_NEXT_\@
|
||||||
|
KERNEL_S1_SCALE_GE_XR_\@:
|
||||||
|
fdiv s2, s4, SCALE
|
||||||
|
fmla SSQ, s2, v2.s[0]
|
||||||
|
KERNEL_S1_NEXT_\@:
|
||||||
|
ldr s5, [X, #4]
|
||||||
|
fcmp s5, REGZERO
|
||||||
|
beq KERNEL_S1_END_\@
|
||||||
|
fabs s5, s5
|
||||||
|
fcmp SCALE, s5
|
||||||
|
bge KERNEL_S1_SCALE_GE_XI_\@
|
||||||
|
fdiv s2, SCALE, s5
|
||||||
|
fmul s2, s2, s2
|
||||||
|
fmul s3, SSQ, s2
|
||||||
|
fadd SSQ, REGONE, s3
|
||||||
|
fmov SCALE, s5
|
||||||
|
b KERNEL_S1_END_\@
|
||||||
|
KERNEL_S1_SCALE_GE_XI_\@:
|
||||||
|
fdiv s2, s5, SCALE
|
||||||
|
fmla SSQ, s2, v2.s[0]
|
||||||
#else
|
#else
|
||||||
ld1 {v1.2d}, [X], INC_X
|
ldr d4, [X]
|
||||||
fmul v1.2d, v1.2d, v1.2d
|
fcmp d4, REGZERO
|
||||||
faddp TMPF, v1.2d
|
beq KERNEL_S1_NEXT_\@
|
||||||
fadd SSQ, SSQ, TMPF
|
fabs d4, d4
|
||||||
|
fcmp SCALE, d4
|
||||||
|
bge KERNEL_S1_SCALE_GE_XR_\@
|
||||||
|
fdiv d2, SCALE, d4
|
||||||
|
fmul d2, d2, d2
|
||||||
|
fmul d3, SSQ, d2
|
||||||
|
fadd SSQ, REGONE, d3
|
||||||
|
fmov SCALE, d4
|
||||||
|
b KERNEL_S1_NEXT_\@
|
||||||
|
KERNEL_S1_SCALE_GE_XR_\@:
|
||||||
|
fdiv d2, d4, SCALE
|
||||||
|
fmla SSQ, d2, v2.d[0]
|
||||||
|
KERNEL_S1_NEXT_\@:
|
||||||
|
ldr d5, [X, #8]
|
||||||
|
fcmp d5, REGZERO
|
||||||
|
beq KERNEL_S1_END_\@
|
||||||
|
fabs d5, d5
|
||||||
|
fcmp SCALE, d5
|
||||||
|
bge KERNEL_S1_SCALE_GE_XI_\@
|
||||||
|
fdiv d2, SCALE, d5
|
||||||
|
fmul d2, d2, d2
|
||||||
|
fmul d3, SSQ, d2
|
||||||
|
fadd SSQ, REGONE, d3
|
||||||
|
fmov SCALE, d5
|
||||||
|
b KERNEL_S1_END_\@
|
||||||
|
KERNEL_S1_SCALE_GE_XI_\@:
|
||||||
|
fdiv d2, d5, SCALE
|
||||||
|
fmla SSQ, d2, v2.d[0]
|
||||||
|
#endif
|
||||||
|
KERNEL_S1_END_\@:
|
||||||
|
add X, X, INC_X
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro KERNEL_F8
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
KERNEL_F1
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro INIT_S
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
lsl INC_X, INC_X, #3 // INC_X * SIZE
|
||||||
|
#else
|
||||||
|
lsl INC_X, INC_X, #4 // INC_X * SIZE
|
||||||
#endif
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*******************************************************************************
|
.macro INIT
|
||||||
|
eor v1.16b, v1.16b, v1.16b // scale=0.0
|
||||||
|
fmov SSQ, #1.0
|
||||||
|
fmov REGONE, SSQ
|
||||||
|
fmov REGZERO, SCALE
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
* End of macro definitions
|
* End of macro definitions
|
||||||
*******************************************************************************/
|
**************************************************************************************/
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
.align 5
|
||||||
fmov SSQ, wzr
|
|
||||||
fmov s5, SSQ
|
INIT
|
||||||
#else
|
|
||||||
fmov SSQ, xzr
|
cmp N, #0
|
||||||
fmov d5, SSQ
|
ble nrm2_kernel_L999
|
||||||
#endif
|
|
||||||
|
cmp INC_X, #0
|
||||||
|
beq nrm2_kernel_L999
|
||||||
|
|
||||||
cmp N, xzr
|
|
||||||
ble nrm2_kernel_zero
|
|
||||||
cmp INC_X, xzr
|
|
||||||
ble nrm2_kernel_zero
|
|
||||||
cmp INC_X, #1
|
cmp INC_X, #1
|
||||||
bne nrm2_kernel_S_BEGIN
|
bne nrm2_kernel_S_BEGIN
|
||||||
|
|
||||||
nrm2_kernel_F_BEGIN:
|
nrm2_kernel_F_BEGIN:
|
||||||
|
|
||||||
asr I, N, #3
|
asr I, N, #3 // I = N / 8
|
||||||
cmp I, xzr
|
cmp I, xzr
|
||||||
beq nrm2_kernel_F1_INIT
|
ble nrm2_kernel_F1
|
||||||
|
|
||||||
nrm2_kernel_F8:
|
nrm2_kernel_F8:
|
||||||
|
|
||||||
KERNEL_F8
|
KERNEL_F8
|
||||||
|
|
||||||
subs I, I, #1
|
subs I, I, #1
|
||||||
bne nrm2_kernel_F8
|
bne nrm2_kernel_F8
|
||||||
|
|
||||||
nrm2_kernel_F8_FINALIZE
|
|
||||||
|
|
||||||
nrm2_kernel_F1:
|
nrm2_kernel_F1:
|
||||||
|
|
||||||
ands I, N, #7
|
ands I, N, #7
|
||||||
ble nrm2_kernel_L999
|
ble nrm2_kernel_L999
|
||||||
|
|
||||||
|
|
||||||
nrm2_kernel_F10:
|
nrm2_kernel_F10:
|
||||||
|
|
||||||
KERNEL_F1
|
KERNEL_F1
|
||||||
|
|
||||||
subs I, I, #1
|
subs I, I, #1
|
||||||
bne nrm2_kernel_F10
|
bne nrm2_kernel_F10
|
||||||
|
|
||||||
b nrm2_kernel_L999
|
b nrm2_kernel_L999
|
||||||
|
|
||||||
nrm2_kernel_F1_INIT:
|
|
||||||
|
|
||||||
b nrm2_kernel_F1
|
|
||||||
|
|
||||||
nrm2_kernel_S_BEGIN:
|
nrm2_kernel_S_BEGIN:
|
||||||
|
|
||||||
INIT_S
|
INIT_S
|
||||||
|
|
||||||
subs N, N, #1
|
mov I, N
|
||||||
ble nrm2_kernel_L999
|
|
||||||
|
|
||||||
asr I, N, #2
|
.align 5
|
||||||
cmp I, xzr
|
|
||||||
ble nrm2_kernel_S1
|
|
||||||
|
|
||||||
nrm2_kernel_S4:
|
|
||||||
|
|
||||||
KERNEL_S1
|
|
||||||
KERNEL_S1
|
|
||||||
KERNEL_S1
|
|
||||||
KERNEL_S1
|
|
||||||
|
|
||||||
subs I, I, #1
|
|
||||||
bne nrm2_kernel_S4
|
|
||||||
|
|
||||||
nrm2_kernel_S1:
|
|
||||||
|
|
||||||
ands I, N, #3
|
|
||||||
ble nrm2_kernel_L999
|
|
||||||
|
|
||||||
nrm2_kernel_S10:
|
nrm2_kernel_S10:
|
||||||
|
|
||||||
KERNEL_S1
|
KERNEL_S1
|
||||||
|
|
||||||
subs I, I, #1
|
subs I, I, #1
|
||||||
bne nrm2_kernel_S10
|
bne nrm2_kernel_S10
|
||||||
|
|
||||||
|
|
||||||
nrm2_kernel_L999:
|
nrm2_kernel_L999:
|
||||||
fsqrt SSQ, SSQ
|
fsqrt SSQ, SSQ
|
||||||
ret
|
fmul SSQ, SCALE, SSQ
|
||||||
|
|
||||||
nrm2_kernel_zero:
|
|
||||||
ret
|
ret
|
||||||
|
|
||||||
EPILOGUE
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define X x3 /* X vector address */
|
#define X x3 /* X vector address */
|
||||||
#define INC_X x4 /* X stride */
|
#define INC_X x4 /* X stride */
|
||||||
#define I x5 /* loop variable */
|
#define I x5 /* loop variable */
|
||||||
|
#define X_COPY x6 /* Copy of X */
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
* Macro definitions
|
* Macro definitions
|
||||||
|
|
@ -50,43 +51,55 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.macro INIT
|
.macro INIT
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
||||||
fneg s2, DA_I
|
|
||||||
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
|
|
||||||
ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I
|
|
||||||
#else
|
#else
|
||||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
||||||
fneg d2, DA_I
|
|
||||||
ins v1.d[1], v2.d[0] // v1 = DA_I, DA_I
|
|
||||||
ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro KERNEL_F1
|
.macro KERNEL_F1
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ld1 {v2.2s}, [X] // X1, X0
|
ld1 {v2.2s}, [X] // X1, X0
|
||||||
ext v3.8b, v2.8b, v2.8b, #4 // X0, X1
|
fmul s3, DA_R, v2.s[0] // DA_R*X0
|
||||||
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
|
fmul s5, DA_I, v2.s[1] // DA_I*X1
|
||||||
fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
fsub s3, s3, s5 // DA_R*X0-DA_I*X1
|
||||||
st1 {v2.2s}, [X], #8
|
|
||||||
|
fmul s4, DA_I, v2.s[0] // DA_I*X0
|
||||||
|
fmul s5, DA_R, v2.s[1] // DA_R*X1
|
||||||
|
fadd s4, s4, s5 // DA_I*X0+DA_R*X1
|
||||||
|
|
||||||
|
ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||||
|
st1 {v3.2s}, [X], #8
|
||||||
#else
|
#else
|
||||||
ld1 {v2.2d}, [X] // X1, X0
|
ld1 {v2.2d}, [X] // X1, X0
|
||||||
ext v3.16b, v2.16b, v2.16b, #8 // X0, X1
|
fmul d3, DA_R, v2.d[0] // DA_R*X0
|
||||||
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
|
fmul d5, DA_I, v2.d[1] // DA_I*X1
|
||||||
fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
fsub d3, d3, d5 // DA_R*X0-DA_I*X1
|
||||||
st1 {v2.2d}, [X], #16
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
fmul d4, DA_I, v2.d[0] // DA_I*X0
|
||||||
|
fmul d5, DA_R, v2.d[1] // DA_R*X1
|
||||||
|
fadd d4, d4, d5 // DA_I*X0+DA_R*X1
|
||||||
|
|
||||||
|
ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||||
|
st1 {v3.2d}, [X], #16
|
||||||
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro KERNEL_INIT_F4
|
.macro KERNEL_INIT_F4
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
// Replicate the lower 2 floats into the upper 2 slots
|
ins v16.s[0], v0.s[0]
|
||||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R
|
ins v16.s[1], v16.s[0]
|
||||||
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I
|
ins v16.d[1], v16.d[0]
|
||||||
|
ins v17.s[0], v1.s[0]
|
||||||
|
ins v17.s[1], v17.s[0]
|
||||||
|
ins v17.d[1], v17.d[0]
|
||||||
|
#else //DOUBLE
|
||||||
|
ins v16.d[0], v0.d[0]
|
||||||
|
ins v16.d[1], v16.d[0]
|
||||||
|
ins v17.d[0], v1.d[0]
|
||||||
|
ins v17.d[1], v17.d[0]
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
.endm
|
.endm
|
||||||
|
|
@ -94,46 +107,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.macro KERNEL_F4
|
.macro KERNEL_F4
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ld1 {v2.4s,v3.4s}, [X] // V2 = X[3], X[2], X[1], X[0]
|
ld2 {v2.4s, v3.4s}, [X], #32
|
||||||
// V3 = X[7], X[6], X[5], X[4]
|
|
||||||
|
|
||||||
ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1]
|
fmul v4.4s, v2.4s, v16.4s
|
||||||
ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1]
|
fmul v6.4s, v3.4s, v17.4s
|
||||||
ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1]
|
fsub v4.4s, v4.4s, v6.4s
|
||||||
fmul v2.4s, v0.4s, v2.4s // X'[ix] += DA_R * X[ix]
|
|
||||||
// X'[ix+1] += DA_R * X[ix+1]
|
|
||||||
fmla v2.4s, v1.4s, v6.4s // X'[ix] += -DA_I * X[ix+1]
|
|
||||||
// X'[ix+1] += DA_I * X[ix]
|
|
||||||
|
|
||||||
ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5]
|
fmul v5.4s, v2.4s, v17.4s
|
||||||
ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5]
|
fmul v6.4s, v3.4s, v16.4s
|
||||||
ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5]
|
fadd v5.4s, v5.4s, v6.4s
|
||||||
fmul v3.4s, v0.4s, v3.4s // X'[ix] += DA_R * X[ix]
|
|
||||||
// X'[ix+1] += DA_R * X[ix+1]
|
|
||||||
fmla v3.4s, v1.4s, v7.4s // X'[ix] += -DA_I * X[ix+1]
|
|
||||||
// X'[ix+1] += DA_I * X[ix]
|
|
||||||
|
|
||||||
st1 {v2.4s,v3.4s}, [X], #32
|
st2 {v4.4s, v5.4s}, [X_COPY], #32
|
||||||
#else // DOUBLE
|
#else // DOUBLE
|
||||||
ld1 {v2.2d,v3.2d,v4.2d,v5.2d}, [X] // CX0, CX1, CX2, CX3
|
ld2 {v2.2d, v3.2d}, [X], #32
|
||||||
ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1]
|
|
||||||
ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1]
|
|
||||||
ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1]
|
|
||||||
ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1]
|
|
||||||
|
|
||||||
fmul v2.2d, v0.2d, v2.2d
|
fmul v4.2d, v2.2d, v16.2d
|
||||||
fmla v2.2d, v1.2d, v20.2d
|
fmul v6.2d, v3.2d, v17.2d
|
||||||
|
fsub v4.2d, v4.2d, v6.2d
|
||||||
|
fmul v5.2d, v2.2d, v17.2d
|
||||||
|
fmul v6.2d, v3.2d, v16.2d
|
||||||
|
fadd v5.2d, v5.2d, v6.2d
|
||||||
|
|
||||||
fmul v3.2d, v0.2d, v3.2d
|
st2 {v4.2d, v5.2d}, [X_COPY], #32
|
||||||
fmla v3.2d, v1.2d, v21.2d
|
|
||||||
st1 {v2.2d,v3.2d}, [X], #32
|
|
||||||
|
|
||||||
fmul v4.2d, v0.2d, v4.2d
|
ld2 {v18.2d, v19.2d}, [X], #32
|
||||||
fmla v4.2d, v1.2d, v22.2d
|
|
||||||
|
|
||||||
fmul v5.2d, v0.2d, v5.2d
|
fmul v20.2d, v18.2d, v16.2d
|
||||||
fmla v5.2d, v1.2d, v23.2d
|
fmul v6.2d, v19.2d, v17.2d
|
||||||
st1 {v4.2d,v5.2d}, [X], #32
|
fsub v20.2d, v20.2d, v6.2d
|
||||||
|
fmul v21.2d, v18.2d, v17.2d
|
||||||
|
fmul v6.2d, v19.2d, v16.2d
|
||||||
|
fadd v21.2d, v21.2d, v6.2d
|
||||||
|
|
||||||
|
st2 {v20.2d, v21.2d}, [X_COPY], #32
|
||||||
#endif
|
#endif
|
||||||
PRFM PLDL1KEEP, [X, #1024]
|
PRFM PLDL1KEEP, [X, #1024]
|
||||||
.endm
|
.endm
|
||||||
|
|
@ -149,21 +155,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
.macro KERNEL_S1
|
.macro KERNEL_S1
|
||||||
|
|
||||||
#if !defined(DOUBLE)
|
#if !defined(DOUBLE)
|
||||||
ld1 {v2.2s}, [X] // X1, X0
|
ld1 {v2.2s}, [X] // X1, X0
|
||||||
ext v3.8b, v2.8b, v2.8b, #4 // X0, X1
|
fmul s3, DA_R, v2.s[0] // DA_R*X0
|
||||||
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
|
fmul s5, DA_I, v2.s[1] // DA_I*X1
|
||||||
fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
fsub s3, s3, s5 // DA_R*X0-DA_I*X1
|
||||||
st1 {v2.2s}, [X], INC_X
|
|
||||||
|
fmul s4, DA_I, v2.s[0] // DA_I*X0
|
||||||
|
fmul s5, DA_R, v2.s[1] // DA_R*X1
|
||||||
|
fadd s4, s4, s5 // DA_I*X0+DA_R*X1
|
||||||
|
|
||||||
|
ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||||
|
st1 {v3.2s}, [X], INC_X
|
||||||
#else
|
#else
|
||||||
ld1 {v2.2d}, [X] // X1, X0
|
ld1 {v2.2d}, [X] // X1, X0
|
||||||
ext v3.16b, v2.16b, v2.16b, #8 // X0, X1
|
fmul d3, DA_R, v2.d[0] // DA_R*X0
|
||||||
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
|
fmul d5, DA_I, v2.d[1] // DA_I*X1
|
||||||
fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
fsub d3, d3, d5 // DA_R*X0-DA_I*X1
|
||||||
st1 {v2.2d}, [X], INC_X
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
fmul d4, DA_I, v2.d[0] // DA_I*X0
|
||||||
|
fmul d5, DA_R, v2.d[1] // DA_R*X1
|
||||||
|
fadd d4, d4, d5 // DA_I*X0+DA_R*X1
|
||||||
|
|
||||||
|
ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||||
|
st1 {v3.2d}, [X], INC_X
|
||||||
|
#endif
|
||||||
.endm
|
.endm
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
|
|
@ -171,21 +187,54 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
*******************************************************************************/
|
*******************************************************************************/
|
||||||
|
|
||||||
PROLOGUE
|
PROLOGUE
|
||||||
|
|
||||||
|
b zscal_begin
|
||||||
|
data_ar:
|
||||||
|
.word 0x3e44fae6
|
||||||
|
data_ai:
|
||||||
|
.word 0x3d320fa2
|
||||||
|
data_xr:
|
||||||
|
.word 0x3f4baff1
|
||||||
|
data_xi:
|
||||||
|
.word 0xbe8ef0bd
|
||||||
|
|
||||||
|
zscal_begin:
|
||||||
|
|
||||||
|
ldr s20, data_ar
|
||||||
|
ldr s21, data_ai
|
||||||
|
ldr s22, data_xr
|
||||||
|
ldr s23, data_xi
|
||||||
|
|
||||||
|
fmul s24, s22, s21
|
||||||
|
fmla s24, s23, v20.s[0]
|
||||||
|
|
||||||
|
fmul s25, s22, s21
|
||||||
|
fmul s26, s23, s20
|
||||||
|
fadd s25, s25, s26
|
||||||
|
|
||||||
|
mov X_COPY, X
|
||||||
|
|
||||||
cmp N, xzr
|
cmp N, xzr
|
||||||
ble zscal_kernel_L999
|
ble zscal_kernel_L999
|
||||||
|
|
||||||
fcmp DA_R, #0.0
|
fcmp DA_R, #0.0
|
||||||
bne zscal_kernel_1
|
bne zscal_kernel_R_non_zero
|
||||||
|
|
||||||
fcmp DA_I, #0.0
|
fcmp DA_I, #0.0
|
||||||
beq zscal_kernel_zero
|
beq zscal_kernel_RI_zero
|
||||||
|
|
||||||
// TODO: special case DA_R == 0 && DA_I != 0
|
b zscal_kernel_R_zero
|
||||||
|
|
||||||
zscal_kernel_1:
|
zscal_kernel_R_non_zero:
|
||||||
|
|
||||||
// TODO: special case DA_R != 0 && DA_I == 0
|
fcmp DA_I, #0.0
|
||||||
|
beq zscal_kernel_I_zero
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* A_R != 0 && A_I != 0
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
zscal_kernel_RI_non_zero:
|
||||||
|
|
||||||
INIT
|
INIT
|
||||||
|
|
||||||
|
|
@ -257,16 +306,85 @@ zscal_kernel_L999:
|
||||||
mov w0, wzr
|
mov w0, wzr
|
||||||
ret
|
ret
|
||||||
|
|
||||||
zscal_kernel_zero:
|
/*******************************************************************************
|
||||||
|
* A_R == 0 && A_I != 0
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
zscal_kernel_R_zero:
|
||||||
|
INIT_S
|
||||||
|
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub s2, s2, DA_I
|
||||||
|
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
|
||||||
|
#else
|
||||||
|
eor v2.16b, v2.16b, v2.16b
|
||||||
|
fsub d2, d2, DA_I
|
||||||
|
ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I
|
||||||
|
#endif
|
||||||
|
|
||||||
|
zscal_kernel_R_zero_1:
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
ld1 {v2.2s}, [X] // X1, X0
|
||||||
|
fmul v2.2s, v2.2s, v1.2s // -DA_I*X1, DA_I*X0
|
||||||
|
ext v2.8b, v2.8b, v2.8b, #4 // DA_I*X0, -DA_I*X1
|
||||||
|
st1 {v2.2s}, [X]
|
||||||
|
#else
|
||||||
|
ld1 {v2.2d}, [X] // X1, X0
|
||||||
|
fmul v2.2d, v2.2d, v1.2d // -DA_I*X1, DA_I*X0
|
||||||
|
ext v2.16b, v2.16b, v2.16b, #8 // DA_I*X0, -DA_I*X1
|
||||||
|
st1 {v2.2d}, [X]
|
||||||
|
#endif
|
||||||
|
add X, X, INC_X
|
||||||
|
subs N, N, #1
|
||||||
|
bne zscal_kernel_R_zero_1
|
||||||
|
|
||||||
|
mov w0, wzr
|
||||||
|
ret
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* A_R != 0 && A_I == 0
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
zscal_kernel_I_zero:
|
||||||
|
INIT_S
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
||||||
|
#else
|
||||||
|
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
||||||
|
#endif
|
||||||
|
|
||||||
|
zscal_kernel_I_zero_1:
|
||||||
|
#if !defined(DOUBLE)
|
||||||
|
ld1 {v2.2s}, [X] // X1, X0
|
||||||
|
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
|
||||||
|
st1 {v2.2s}, [X]
|
||||||
|
#else
|
||||||
|
ld1 {v2.2d}, [X] // X1, X0
|
||||||
|
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
|
||||||
|
st1 {v2.2d}, [X]
|
||||||
|
#endif
|
||||||
|
add X, X, INC_X
|
||||||
|
subs N, N, #1
|
||||||
|
bne zscal_kernel_I_zero_1
|
||||||
|
|
||||||
|
mov w0, wzr
|
||||||
|
ret
|
||||||
|
|
||||||
|
/*******************************************************************************
|
||||||
|
* A_R == 0 && A_I == 0
|
||||||
|
*******************************************************************************/
|
||||||
|
|
||||||
|
zscal_kernel_RI_zero:
|
||||||
|
|
||||||
INIT_S
|
INIT_S
|
||||||
|
|
||||||
zscal_kernel_Z1:
|
zscal_kernel_RI_zero_1:
|
||||||
|
|
||||||
stp DA_R, DA_I, [X]
|
stp DA_R, DA_I, [X]
|
||||||
add X, X, INC_X
|
add X, X, INC_X
|
||||||
subs N, N, #1
|
subs N, N, #1
|
||||||
bne zscal_kernel_Z1
|
bne zscal_kernel_RI_zero_1
|
||||||
|
|
||||||
mov w0, wzr
|
mov w0, wzr
|
||||||
ret
|
ret
|
||||||
|
|
|
||||||
|
|
@ -187,73 +187,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
fmul v16.2d, v0.2d, v8.2d[0]
|
fmul v16.2d, v0.2d, v8.2d[0]
|
||||||
OP_ii v16.2d, v1.2d, v9.2d[0]
|
OP_ii v16.2d, v1.2d, v9.2d[0]
|
||||||
fmul v17.2d, v0.2d, v9.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v17.2d, v17.2d
|
eor v17.16b, v17.16b, v17.16b
|
||||||
|
fmls v17.2d, v0.2d, v9.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v17.2d, v0.2d, v9.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v17.2d, v1.2d, v8.2d[0]
|
OP_ir v17.2d, v1.2d, v8.2d[0]
|
||||||
|
|
||||||
fmul v18.2d, v2.2d, v8.2d[0]
|
fmul v18.2d, v2.2d, v8.2d[0]
|
||||||
OP_ii v18.2d, v3.2d, v9.2d[0]
|
OP_ii v18.2d, v3.2d, v9.2d[0]
|
||||||
fmul v19.2d, v2.2d, v9.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v19.2d, v19.2d
|
eor v19.16b, v19.16b, v19.16b
|
||||||
|
fmls v19.2d, v2.2d, v9.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v19.2d, v2.2d, v9.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v19.2d, v3.2d, v8.2d[0]
|
OP_ir v19.2d, v3.2d, v8.2d[0]
|
||||||
|
|
||||||
fmul v20.2d, v0.2d, v8.2d[1]
|
fmul v20.2d, v0.2d, v8.2d[1]
|
||||||
OP_ii v20.2d, v1.2d, v9.2d[1]
|
OP_ii v20.2d, v1.2d, v9.2d[1]
|
||||||
fmul v21.2d, v0.2d, v9.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v21.2d, v21.2d
|
eor v21.16b, v21.16b, v21.16b
|
||||||
|
fmls v21.2d, v0.2d, v9.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v21.2d, v0.2d, v9.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v21.2d, v1.2d, v8.2d[1]
|
OP_ir v21.2d, v1.2d, v8.2d[1]
|
||||||
|
|
||||||
fmul v22.2d, v2.2d, v8.2d[1]
|
fmul v22.2d, v2.2d, v8.2d[1]
|
||||||
OP_ii v22.2d, v3.2d, v9.2d[1]
|
OP_ii v22.2d, v3.2d, v9.2d[1]
|
||||||
fmul v23.2d, v2.2d, v9.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v23.2d, v23.2d
|
eor v23.16b, v23.16b, v23.16b
|
||||||
|
fmls v23.2d, v2.2d, v9.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v23.2d, v2.2d, v9.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v23.2d, v3.2d, v8.2d[1]
|
OP_ir v23.2d, v3.2d, v8.2d[1]
|
||||||
|
|
||||||
fmul v24.2d, v0.2d, v10.2d[0]
|
fmul v24.2d, v0.2d, v10.2d[0]
|
||||||
OP_ii v24.2d, v1.2d, v11.2d[0]
|
OP_ii v24.2d, v1.2d, v11.2d[0]
|
||||||
fmul v25.2d, v0.2d, v11.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v25.2d, v25.2d
|
eor v25.16b, v25.16b, v25.16b
|
||||||
|
fmls v25.2d, v0.2d, v11.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v25.2d, v0.2d, v11.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v25.2d, v1.2d, v10.2d[0]
|
OP_ir v25.2d, v1.2d, v10.2d[0]
|
||||||
|
|
||||||
fmul v26.2d, v2.2d, v10.2d[0]
|
fmul v26.2d, v2.2d, v10.2d[0]
|
||||||
OP_ii v26.2d, v3.2d, v11.2d[0]
|
OP_ii v26.2d, v3.2d, v11.2d[0]
|
||||||
fmul v27.2d, v2.2d, v11.2d[0]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v27.2d, v27.2d
|
eor v27.16b, v27.16b, v27.16b
|
||||||
|
fmls v27.2d, v2.2d, v11.2d[0]
|
||||||
|
#else
|
||||||
|
fmul v27.2d, v2.2d, v11.2d[0]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v27.2d, v3.2d, v10.2d[0]
|
OP_ir v27.2d, v3.2d, v10.2d[0]
|
||||||
|
|
||||||
fmul v28.2d, v0.2d, v10.2d[1]
|
fmul v28.2d, v0.2d, v10.2d[1]
|
||||||
OP_ii v28.2d, v1.2d, v11.2d[1]
|
OP_ii v28.2d, v1.2d, v11.2d[1]
|
||||||
fmul v29.2d, v0.2d, v11.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v29.2d, v29.2d
|
eor v29.16b, v29.16b, v29.16b
|
||||||
|
fmls v29.2d, v0.2d, v11.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v29.2d, v0.2d, v11.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v29.2d, v1.2d, v10.2d[1]
|
OP_ir v29.2d, v1.2d, v10.2d[1]
|
||||||
|
|
||||||
fmul v30.2d, v2.2d, v10.2d[1]
|
fmul v30.2d, v2.2d, v10.2d[1]
|
||||||
OP_ii v30.2d, v3.2d, v11.2d[1]
|
OP_ii v30.2d, v3.2d, v11.2d[1]
|
||||||
fmul v31.2d, v2.2d, v11.2d[1]
|
|
||||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||||
fneg v31.2d, v31.2d
|
eor v31.16b, v31.16b, v31.16b
|
||||||
|
fmls v31.2d, v2.2d, v11.2d[1]
|
||||||
|
#else
|
||||||
|
fmul v31.2d, v2.2d, v11.2d[1]
|
||||||
#endif
|
#endif
|
||||||
OP_ir v31.2d, v3.2d, v10.2d[1]
|
OP_ir v31.2d, v3.2d, v10.2d[1]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -137,8 +137,13 @@ endif()
|
||||||
|
|
||||||
|
|
||||||
add_library(blas ${ALLOBJ})
|
add_library(blas ${ALLOBJ})
|
||||||
if(UNIX)
|
#if(UNIX)
|
||||||
target_link_libraries(blas m)
|
# target_link_libraries(blas m)
|
||||||
endif()
|
#endif()
|
||||||
|
set_target_properties(
|
||||||
|
blas PROPERTIES
|
||||||
|
VERSION ${LAPACK_VERSION}
|
||||||
|
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||||
|
)
|
||||||
target_link_libraries(blas)
|
target_link_libraries(blas)
|
||||||
lapack_install_library(blas)
|
lapack_install_library(blas)
|
||||||
|
|
|
||||||
|
|
@ -23,8 +23,9 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> forms the dot product of two vectors, conjugating the first
|
*> CDOTC forms the dot product of two complex vectors
|
||||||
*> vector.
|
*> CDOTC = X^H * Y
|
||||||
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -35,7 +36,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex_blas_level1
|
*> \ingroup complex_blas_level1
|
||||||
*
|
*
|
||||||
|
|
@ -51,10 +52,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
|
COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,INCY,N
|
INTEGER INCX,INCY,N
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,9 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> CDOTU forms the dot product of two vectors.
|
*> CDOTU forms the dot product of two complex vectors
|
||||||
|
*> CDOTU = X^T * Y
|
||||||
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +36,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex_blas_level1
|
*> \ingroup complex_blas_level1
|
||||||
*
|
*
|
||||||
|
|
@ -50,10 +52,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
|
COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,INCY,N
|
INTEGER INCX,INCY,N
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex_blas_level2
|
*> \ingroup complex_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -187,10 +187,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE CGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE CGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX ALPHA,BETA
|
COMPLEX ALPHA,BETA
|
||||||
|
|
@ -319,26 +319,22 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
IF (J.GT.KU) KY = KY + INCY
|
IF (J.GT.KU) KY = KY + INCY
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex_blas_level3
|
*> \ingroup complex_blas_level3
|
||||||
*
|
*
|
||||||
|
|
@ -187,10 +187,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX ALPHA,BETA
|
COMPLEX ALPHA,BETA
|
||||||
|
|
@ -317,12 +317,10 @@
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 80 L = 1,K
|
DO 80 L = 1,K
|
||||||
IF (B(L,J).NE.ZERO) THEN
|
TEMP = ALPHA*B(L,J)
|
||||||
TEMP = ALPHA*B(L,J)
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
90 CONTINUE
|
90 CONTINUE
|
||||||
ELSE IF (CONJA) THEN
|
ELSE IF (CONJA) THEN
|
||||||
|
|
@ -376,17 +374,15 @@
|
||||||
170 CONTINUE
|
170 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 190 L = 1,K
|
DO 190 L = 1,K
|
||||||
IF (B(J,L).NE.ZERO) THEN
|
TEMP = ALPHA*CONJG(B(J,L))
|
||||||
TEMP = ALPHA*CONJG(B(J,L))
|
DO 180 I = 1,M
|
||||||
DO 180 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
180 CONTINUE
|
||||||
180 CONTINUE
|
|
||||||
END IF
|
|
||||||
190 CONTINUE
|
190 CONTINUE
|
||||||
200 CONTINUE
|
200 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
*
|
*
|
||||||
* Form C := alpha*A*B**T + beta*C
|
* Form C := alpha*A*B**T + beta*C
|
||||||
*
|
*
|
||||||
DO 250 J = 1,N
|
DO 250 J = 1,N
|
||||||
IF (BETA.EQ.ZERO) THEN
|
IF (BETA.EQ.ZERO) THEN
|
||||||
|
|
@ -399,12 +395,10 @@
|
||||||
220 CONTINUE
|
220 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 240 L = 1,K
|
DO 240 L = 1,K
|
||||||
IF (B(J,L).NE.ZERO) THEN
|
TEMP = ALPHA*B(J,L)
|
||||||
TEMP = ALPHA*B(J,L)
|
DO 230 I = 1,M
|
||||||
DO 230 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
230 CONTINUE
|
||||||
230 CONTINUE
|
|
||||||
END IF
|
|
||||||
240 CONTINUE
|
240 CONTINUE
|
||||||
250 CONTINUE
|
250 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex_blas_level2
|
*> \ingroup complex_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -158,10 +158,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX ALPHA,BETA
|
COMPLEX ALPHA,BETA
|
||||||
|
|
@ -285,24 +285,20 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
DO 50 I = 1,M
|
||||||
DO 50 I = 1,M
|
Y(I) = Y(I) + TEMP*A(I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> DCABS1 computes absolute value of a double complex number
|
*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -32,17 +32,17 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup double_blas_level1
|
*> \ingroup double_blas_level1
|
||||||
*
|
*
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
DOUBLE PRECISION FUNCTION DCABS1(Z)
|
DOUBLE PRECISION FUNCTION DCABS1(Z)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX*16 Z
|
COMPLEX*16 Z
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup double_blas_level2
|
*> \ingroup double_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -185,10 +185,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
DOUBLE PRECISION ALPHA,BETA
|
DOUBLE PRECISION ALPHA,BETA
|
||||||
|
|
@ -312,26 +312,22 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
IF (J.GT.KU) KY = KY + INCY
|
IF (J.GT.KU) KY = KY + INCY
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup double_blas_level3
|
*> \ingroup double_blas_level3
|
||||||
*
|
*
|
||||||
|
|
@ -187,10 +187,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
DOUBLE PRECISION ALPHA,BETA
|
DOUBLE PRECISION ALPHA,BETA
|
||||||
|
|
@ -311,12 +311,10 @@
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 80 L = 1,K
|
DO 80 L = 1,K
|
||||||
IF (B(L,J).NE.ZERO) THEN
|
TEMP = ALPHA*B(L,J)
|
||||||
TEMP = ALPHA*B(L,J)
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
90 CONTINUE
|
90 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
|
|
@ -353,12 +351,10 @@
|
||||||
140 CONTINUE
|
140 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 160 L = 1,K
|
DO 160 L = 1,K
|
||||||
IF (B(J,L).NE.ZERO) THEN
|
TEMP = ALPHA*B(J,L)
|
||||||
TEMP = ALPHA*B(J,L)
|
DO 150 I = 1,M
|
||||||
DO 150 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
150 CONTINUE
|
||||||
150 CONTINUE
|
|
||||||
END IF
|
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
170 CONTINUE
|
170 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup double_blas_level2
|
*> \ingroup double_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -156,10 +156,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
DOUBLE PRECISION ALPHA,BETA
|
DOUBLE PRECISION ALPHA,BETA
|
||||||
|
|
@ -278,24 +278,20 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
DO 50 I = 1,M
|
||||||
DO 50 I = 1,M
|
Y(I) = Y(I) + TEMP*A(I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,8 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> DZASUM takes the sum of the absolute values.
|
*> DZASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
|
||||||
|
*> returns a single precision result.
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +35,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup double_blas_level1
|
*> \ingroup double_blas_level1
|
||||||
*
|
*
|
||||||
|
|
@ -51,10 +52,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX)
|
DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,N
|
INTEGER INCX,N
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> ICAMAX finds the index of element having max. absolute value.
|
*> ICAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)|
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +34,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup aux_blas
|
*> \ingroup aux_blas
|
||||||
*
|
*
|
||||||
|
|
@ -51,10 +51,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
INTEGER FUNCTION ICAMAX(N,CX,INCX)
|
INTEGER FUNCTION ICAMAX(N,CX,INCX)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,N
|
INTEGER INCX,N
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> IDAMAX finds the index of element having max. absolute value.
|
*> IDAMAX finds the index of the first element having maximum absolute value.
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +34,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup aux_blas
|
*> \ingroup aux_blas
|
||||||
*
|
*
|
||||||
|
|
@ -51,10 +51,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
INTEGER FUNCTION IDAMAX(N,DX,INCX)
|
INTEGER FUNCTION IDAMAX(N,DX,INCX)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,N
|
INTEGER INCX,N
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> ISAMAX finds the index of element having max. absolute value.
|
*> ISAMAX finds the index of the first element having maximum absolute value.
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +34,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup aux_blas
|
*> \ingroup aux_blas
|
||||||
*
|
*
|
||||||
|
|
@ -51,10 +51,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
INTEGER FUNCTION ISAMAX(N,SX,INCX)
|
INTEGER FUNCTION ISAMAX(N,SX,INCX)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,N
|
INTEGER INCX,N
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> IZAMAX finds the index of element having max. absolute value.
|
*> IZAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)|
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +34,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup aux_blas
|
*> \ingroup aux_blas
|
||||||
*
|
*
|
||||||
|
|
@ -51,10 +51,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
INTEGER FUNCTION IZAMAX(N,ZX,INCX)
|
INTEGER FUNCTION IZAMAX(N,ZX,INCX)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,N
|
INTEGER INCX,N
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> SCABS1 computes absolute value of a complex number
|
*> SCABS1 computes |Re(.)| + |Im(.)| of a complex number
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -31,17 +31,17 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup single_blas_level1
|
*> \ingroup single_blas_level1
|
||||||
*
|
*
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
REAL FUNCTION SCABS1(Z)
|
REAL FUNCTION SCABS1(Z)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX Z
|
COMPLEX Z
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> SCASUM takes the sum of the absolute values of a complex vector and
|
*> SCASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
|
||||||
*> returns a single precision result.
|
*> returns a single precision result.
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
|
|
@ -35,7 +35,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup single_blas_level1
|
*> \ingroup single_blas_level1
|
||||||
*
|
*
|
||||||
|
|
@ -52,10 +52,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
REAL FUNCTION SCASUM(N,CX,INCX)
|
REAL FUNCTION SCASUM(N,CX,INCX)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,N
|
INTEGER INCX,N
|
||||||
|
|
|
||||||
|
|
@ -163,7 +163,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup single_blas_level2
|
*> \ingroup single_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -185,10 +185,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE SGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE SGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
REAL ALPHA,BETA
|
REAL ALPHA,BETA
|
||||||
|
|
@ -312,26 +312,22 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
IF (J.GT.KU) KY = KY + INCY
|
IF (J.GT.KU) KY = KY + INCY
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup single_blas_level3
|
*> \ingroup single_blas_level3
|
||||||
*
|
*
|
||||||
|
|
@ -187,10 +187,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
REAL ALPHA,BETA
|
REAL ALPHA,BETA
|
||||||
|
|
@ -311,12 +311,10 @@
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 80 L = 1,K
|
DO 80 L = 1,K
|
||||||
IF (B(L,J).NE.ZERO) THEN
|
TEMP = ALPHA*B(L,J)
|
||||||
TEMP = ALPHA*B(L,J)
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
90 CONTINUE
|
90 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
|
|
@ -353,12 +351,10 @@
|
||||||
140 CONTINUE
|
140 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 160 L = 1,K
|
DO 160 L = 1,K
|
||||||
IF (B(J,L).NE.ZERO) THEN
|
TEMP = ALPHA*B(J,L)
|
||||||
TEMP = ALPHA*B(J,L)
|
DO 150 I = 1,M
|
||||||
DO 150 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
150 CONTINUE
|
||||||
150 CONTINUE
|
|
||||||
END IF
|
|
||||||
160 CONTINUE
|
160 CONTINUE
|
||||||
170 CONTINUE
|
170 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
|
|
|
||||||
|
|
@ -134,7 +134,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup single_blas_level2
|
*> \ingroup single_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -156,10 +156,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
REAL ALPHA,BETA
|
REAL ALPHA,BETA
|
||||||
|
|
@ -278,24 +278,20 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
DO 50 I = 1,M
|
||||||
DO 50 I = 1,M
|
Y(I) = Y(I) + TEMP*A(I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,9 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> ZDOTC forms the dot product of a vector.
|
*> ZDOTC forms the dot product of two complex vectors
|
||||||
|
*> ZDOTC = X^H * Y
|
||||||
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +36,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex16_blas_level1
|
*> \ingroup complex16_blas_level1
|
||||||
*
|
*
|
||||||
|
|
@ -50,10 +52,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
|
COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,INCY,N
|
INTEGER INCX,INCY,N
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,9 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
*> ZDOTU forms the dot product of two vectors.
|
*> ZDOTU forms the dot product of two complex vectors
|
||||||
|
*> ZDOTU = X^T * Y
|
||||||
|
*>
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
@ -34,7 +36,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex16_blas_level1
|
*> \ingroup complex16_blas_level1
|
||||||
*
|
*
|
||||||
|
|
@ -50,10 +52,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY)
|
COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
INTEGER INCX,INCY,N
|
INTEGER INCX,INCY,N
|
||||||
|
|
|
||||||
|
|
@ -165,7 +165,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex16_blas_level2
|
*> \ingroup complex16_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -187,10 +187,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE ZGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE ZGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX*16 ALPHA,BETA
|
COMPLEX*16 ALPHA,BETA
|
||||||
|
|
@ -319,26 +319,22 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
K = KUP1 - J
|
||||||
K = KUP1 - J
|
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
IF (J.GT.KU) KY = KY + INCY
|
IF (J.GT.KU) KY = KY + INCY
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
|
|
|
||||||
|
|
@ -166,7 +166,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex16_blas_level3
|
*> \ingroup complex16_blas_level3
|
||||||
*
|
*
|
||||||
|
|
@ -187,10 +187,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX*16 ALPHA,BETA
|
COMPLEX*16 ALPHA,BETA
|
||||||
|
|
@ -317,12 +317,10 @@
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 80 L = 1,K
|
DO 80 L = 1,K
|
||||||
IF (B(L,J).NE.ZERO) THEN
|
TEMP = ALPHA*B(L,J)
|
||||||
TEMP = ALPHA*B(L,J)
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
90 CONTINUE
|
90 CONTINUE
|
||||||
ELSE IF (CONJA) THEN
|
ELSE IF (CONJA) THEN
|
||||||
|
|
@ -376,17 +374,15 @@
|
||||||
170 CONTINUE
|
170 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 190 L = 1,K
|
DO 190 L = 1,K
|
||||||
IF (B(J,L).NE.ZERO) THEN
|
TEMP = ALPHA*DCONJG(B(J,L))
|
||||||
TEMP = ALPHA*DCONJG(B(J,L))
|
DO 180 I = 1,M
|
||||||
DO 180 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
180 CONTINUE
|
||||||
180 CONTINUE
|
|
||||||
END IF
|
|
||||||
190 CONTINUE
|
190 CONTINUE
|
||||||
200 CONTINUE
|
200 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
*
|
*
|
||||||
* Form C := alpha*A*B**T + beta*C
|
* Form C := alpha*A*B**T + beta*C
|
||||||
*
|
*
|
||||||
DO 250 J = 1,N
|
DO 250 J = 1,N
|
||||||
IF (BETA.EQ.ZERO) THEN
|
IF (BETA.EQ.ZERO) THEN
|
||||||
|
|
@ -399,12 +395,10 @@
|
||||||
220 CONTINUE
|
220 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
DO 240 L = 1,K
|
DO 240 L = 1,K
|
||||||
IF (B(J,L).NE.ZERO) THEN
|
TEMP = ALPHA*B(J,L)
|
||||||
TEMP = ALPHA*B(J,L)
|
DO 230 I = 1,M
|
||||||
DO 230 I = 1,M
|
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
230 CONTINUE
|
||||||
230 CONTINUE
|
|
||||||
END IF
|
|
||||||
240 CONTINUE
|
240 CONTINUE
|
||||||
250 CONTINUE
|
250 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
|
|
|
||||||
|
|
@ -136,7 +136,7 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date November 2011
|
*> \date November 2015
|
||||||
*
|
*
|
||||||
*> \ingroup complex16_blas_level2
|
*> \ingroup complex16_blas_level2
|
||||||
*
|
*
|
||||||
|
|
@ -158,10 +158,10 @@
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||||
*
|
*
|
||||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* November 2011
|
* November 2015
|
||||||
*
|
*
|
||||||
* .. Scalar Arguments ..
|
* .. Scalar Arguments ..
|
||||||
COMPLEX*16 ALPHA,BETA
|
COMPLEX*16 ALPHA,BETA
|
||||||
|
|
@ -285,24 +285,20 @@
|
||||||
JX = KX
|
JX = KX
|
||||||
IF (INCY.EQ.1) THEN
|
IF (INCY.EQ.1) THEN
|
||||||
DO 60 J = 1,N
|
DO 60 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
DO 50 I = 1,M
|
||||||
DO 50 I = 1,M
|
Y(I) = Y(I) + TEMP*A(I,J)
|
||||||
Y(I) = Y(I) + TEMP*A(I,J)
|
50 CONTINUE
|
||||||
50 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
60 CONTINUE
|
60 CONTINUE
|
||||||
ELSE
|
ELSE
|
||||||
DO 80 J = 1,N
|
DO 80 J = 1,N
|
||||||
IF (X(JX).NE.ZERO) THEN
|
TEMP = ALPHA*X(JX)
|
||||||
TEMP = ALPHA*X(JX)
|
IY = KY
|
||||||
IY = KY
|
DO 70 I = 1,M
|
||||||
DO 70 I = 1,M
|
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
IY = IY + INCY
|
||||||
IY = IY + INCY
|
70 CONTINUE
|
||||||
70 CONTINUE
|
|
||||||
END IF
|
|
||||||
JX = JX + INCX
|
JX = JX + INCX
|
||||||
80 CONTINUE
|
80 CONTINUE
|
||||||
END IF
|
END IF
|
||||||
|
|
|
||||||
|
|
@ -30,17 +30,16 @@ macro(add_blas_test name src)
|
||||||
get_filename_component(baseNAME ${src} NAME_WE)
|
get_filename_component(baseNAME ${src} NAME_WE)
|
||||||
set(TEST_INPUT "${LAPACK_SOURCE_DIR}/BLAS/${baseNAME}.in")
|
set(TEST_INPUT "${LAPACK_SOURCE_DIR}/BLAS/${baseNAME}.in")
|
||||||
add_executable(${name} ${src})
|
add_executable(${name} ${src})
|
||||||
get_target_property(TEST_LOC ${name} LOCATION)
|
|
||||||
target_link_libraries(${name} blas)
|
target_link_libraries(${name} blas)
|
||||||
if(EXISTS "${TEST_INPUT}")
|
if(EXISTS "${TEST_INPUT}")
|
||||||
add_test(BLAS-${name} "${CMAKE_COMMAND}"
|
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
|
||||||
-DTEST=${TEST_LOC}
|
-DTEST=$<TARGET_FILE:${name}>
|
||||||
-DINPUT=${TEST_INPUT}
|
-DINPUT=${TEST_INPUT}
|
||||||
-DINTDIR=${CMAKE_CFG_INTDIR}
|
-DINTDIR=${CMAKE_CFG_INTDIR}
|
||||||
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
|
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
|
||||||
else()
|
else()
|
||||||
add_test(BLAS-${name} "${CMAKE_COMMAND}"
|
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
|
||||||
-DTEST=${TEST_LOC}
|
-DTEST=$<TARGET_FILE:${name}>
|
||||||
-DINTDIR=${CMAKE_CFG_INTDIR}
|
-DINTDIR=${CMAKE_CFG_INTDIR}
|
||||||
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
|
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
|
||||||
endif()
|
endif()
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@
|
||||||
REAL RZERO
|
REAL RZERO
|
||||||
PARAMETER ( RZERO = 0.0 )
|
PARAMETER ( RZERO = 0.0 )
|
||||||
INTEGER NMAX, INCMAX
|
INTEGER NMAX, INCMAX
|
||||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||||
$ NALMAX = 7, NBEMAX = 7 )
|
$ NALMAX = 7, NBEMAX = 7 )
|
||||||
|
|
|
||||||
|
|
@ -102,7 +102,7 @@
|
||||||
REAL RZERO
|
REAL RZERO
|
||||||
PARAMETER ( RZERO = 0.0 )
|
PARAMETER ( RZERO = 0.0 )
|
||||||
INTEGER NMAX
|
INTEGER NMAX
|
||||||
PARAMETER ( NMAX = 128 )
|
PARAMETER ( NMAX = 65 )
|
||||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@
|
||||||
DOUBLE PRECISION ZERO, ONE
|
DOUBLE PRECISION ZERO, ONE
|
||||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||||
INTEGER NMAX, INCMAX
|
INTEGER NMAX, INCMAX
|
||||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||||
$ NALMAX = 7, NBEMAX = 7 )
|
$ NALMAX = 7, NBEMAX = 7 )
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@
|
||||||
DOUBLE PRECISION ZERO, ONE
|
DOUBLE PRECISION ZERO, ONE
|
||||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||||
INTEGER NMAX
|
INTEGER NMAX
|
||||||
PARAMETER ( NMAX = 128 )
|
PARAMETER ( NMAX = 65 )
|
||||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,7 @@
|
||||||
REAL ZERO, ONE
|
REAL ZERO, ONE
|
||||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||||
INTEGER NMAX, INCMAX
|
INTEGER NMAX, INCMAX
|
||||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||||
$ NALMAX = 7, NBEMAX = 7 )
|
$ NALMAX = 7, NBEMAX = 7 )
|
||||||
|
|
|
||||||
|
|
@ -97,7 +97,7 @@
|
||||||
REAL ZERO, ONE
|
REAL ZERO, ONE
|
||||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||||
INTEGER NMAX
|
INTEGER NMAX
|
||||||
PARAMETER ( NMAX = 128 )
|
PARAMETER ( NMAX = 65 )
|
||||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
|
|
|
||||||
|
|
@ -121,7 +121,7 @@
|
||||||
DOUBLE PRECISION RZERO
|
DOUBLE PRECISION RZERO
|
||||||
PARAMETER ( RZERO = 0.0D0 )
|
PARAMETER ( RZERO = 0.0D0 )
|
||||||
INTEGER NMAX, INCMAX
|
INTEGER NMAX, INCMAX
|
||||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||||
$ NALMAX = 7, NBEMAX = 7 )
|
$ NALMAX = 7, NBEMAX = 7 )
|
||||||
|
|
|
||||||
|
|
@ -104,7 +104,7 @@
|
||||||
DOUBLE PRECISION RZERO
|
DOUBLE PRECISION RZERO
|
||||||
PARAMETER ( RZERO = 0.0D0 )
|
PARAMETER ( RZERO = 0.0D0 )
|
||||||
INTEGER NMAX
|
INTEGER NMAX
|
||||||
PARAMETER ( NMAX = 128 )
|
PARAMETER ( NMAX = 65 )
|
||||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,90 @@
|
||||||
|
message(STATUS "CBLAS enable")
|
||||||
|
enable_language(C)
|
||||||
|
|
||||||
|
set(LAPACK_INSTALL_EXPORT_NAME cblas-targets)
|
||||||
|
|
||||||
|
# Create a header file cblas.h for the routines called in my C programs
|
||||||
|
include(FortranCInterface)
|
||||||
|
FortranCInterface_HEADER( ${CMAKE_CURRENT_SOURCE_DIR}/include/cblas_mangling.h
|
||||||
|
MACRO_NAMESPACE "F77_"
|
||||||
|
SYMBOL_NAMESPACE "F77_" )
|
||||||
|
|
||||||
|
# Old way to detect mangling
|
||||||
|
#include(FortranMangling)
|
||||||
|
#FORTRAN_MANGLING(CDEFS)
|
||||||
|
#set(CDEFS ${CDEFS} CACHE STRING "Fortran Mangling" FORCE)
|
||||||
|
#MESSAGE(STATUS "=========")
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
|
# Compiler Flags
|
||||||
|
#ADD_DEFINITIONS( "-D${CDEFS}")
|
||||||
|
|
||||||
|
|
||||||
|
include_directories( include )
|
||||||
|
add_subdirectory(include)
|
||||||
|
add_subdirectory(src)
|
||||||
|
|
||||||
|
macro(append_subdir_files variable dirname)
|
||||||
|
get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable})
|
||||||
|
foreach(depfile ${holder})
|
||||||
|
list(APPEND ${variable} "${dirname}/${depfile}")
|
||||||
|
endforeach()
|
||||||
|
endmacro()
|
||||||
|
|
||||||
|
append_subdir_files(CBLAS_INCLUDE "include")
|
||||||
|
INSTALL( FILES ${CBLAS_INCLUDE} DESTINATION include )
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
|
if(BUILD_TESTING)
|
||||||
|
add_subdirectory(testing)
|
||||||
|
add_subdirectory(examples)
|
||||||
|
endif(BUILD_TESTING)
|
||||||
|
|
||||||
|
if(NOT BLAS_FOUND)
|
||||||
|
set(ALL_TARGETS ${ALL_TARGETS} blas)
|
||||||
|
endif(NOT BLAS_FOUND)
|
||||||
|
|
||||||
|
# Export cblas targets from the
|
||||||
|
# install tree, if any.
|
||||||
|
set(_cblas_config_install_guard_target "")
|
||||||
|
if(ALL_TARGETS)
|
||||||
|
install(EXPORT cblas-targets
|
||||||
|
DESTINATION lib/cmake/cblas-${LAPACK_VERSION})
|
||||||
|
# Choose one of the cblas targets to use as a guard for
|
||||||
|
# cblas-config.cmake to load targets from the install tree.
|
||||||
|
list(GET ALL_TARGETS 0 _cblas_config_install_guard_target)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Export cblas targets from the build tree, if any.
|
||||||
|
set(_cblas_config_build_guard_target "")
|
||||||
|
if(ALL_TARGETS)
|
||||||
|
export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake)
|
||||||
|
|
||||||
|
# Choose one of the cblas targets to use as a guard
|
||||||
|
# for cblas-config.cmake to load targets from the build tree.
|
||||||
|
list(GET ALL_TARGETS 0 _cblas_config_build_guard_target)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-version.cmake.in
|
||||||
|
${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY)
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-build.cmake.in
|
||||||
|
${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY)
|
||||||
|
|
||||||
|
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc)
|
||||||
|
install(FILES
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/cblas.pc
|
||||||
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
|
)
|
||||||
|
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY)
|
||||||
|
install(FILES
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake
|
||||||
|
${LAPACK_BINARY_DIR}/cblas-config-version.cmake
|
||||||
|
DESTINATION lib/cmake/cblas-${LAPACK_VERSION}
|
||||||
|
)
|
||||||
|
|
||||||
|
#install(EXPORT cblas-targets
|
||||||
|
# DESTINATION lib/cmake/cblas-${LAPACK_VERSION})
|
||||||
|
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
include ../make.inc
|
||||||
|
|
||||||
|
all:
|
||||||
|
cd include && cp cblas_mangling_with_flags.h cblas_mangling.h
|
||||||
|
cd src && $(MAKE) all
|
||||||
|
|
||||||
|
|
||||||
|
clean: cleanlib
|
||||||
|
|
||||||
|
cleanlib:
|
||||||
|
cd src && $(MAKE) clean
|
||||||
|
|
||||||
|
cleanexe:
|
||||||
|
cd testing && $(MAKE) cleanexe
|
||||||
|
|
||||||
|
cleanall: clean cleanexe
|
||||||
|
rm -f $(CBLASLIB)
|
||||||
|
cd examples && rm -f *.o cblas_ex1 cblas_ex2
|
||||||
|
|
||||||
|
cblas_testing:
|
||||||
|
cd testing && $(MAKE) all
|
||||||
|
|
||||||
|
runtst:
|
||||||
|
cd testing && $(MAKE) run
|
||||||
|
|
||||||
|
example: all
|
||||||
|
cd examples && make all
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
#
|
||||||
|
# Makefile.LINUX
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# If you compile, change the name to Makefile.in.
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Shell
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
SHELL = /bin/sh
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Platform
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
PLAT = LINUX
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Libraries and includes
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
BLLIB = $(home)/lib/librefblas.a
|
||||||
|
CBLIB = ../lib/libcblas.a
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Compilers
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CC = gcc
|
||||||
|
FC = gfortran
|
||||||
|
LOADER = $(FC)
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Flags for Compilers
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CFLAGS = -O3 -DADD_
|
||||||
|
FFLAGS = -O3
|
||||||
|
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
# Archive programs and flags
|
||||||
|
#-----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
ARCH = ar
|
||||||
|
ARCHFLAGS = cr
|
||||||
|
RANLIB = ranlib
|
||||||
|
|
@ -0,0 +1,59 @@
|
||||||
|
INSTALLATION
|
||||||
|
|
||||||
|
Make sure to set these variables appropriately in your Make.inc in the LAPACK folder:
|
||||||
|
|
||||||
|
CBLASLIB is your CBLAS library
|
||||||
|
BLASLIB is your Legacy BLAS library (by default the Reference BLAS shipped within LAPACK)
|
||||||
|
|
||||||
|
Then type:
|
||||||
|
|
||||||
|
prompt> make
|
||||||
|
|
||||||
|
which will create the CBLAS library.
|
||||||
|
|
||||||
|
CREATING THE TESTERS
|
||||||
|
|
||||||
|
type:
|
||||||
|
|
||||||
|
prompt> make cblas_testing
|
||||||
|
|
||||||
|
This will create the BLAS library if necessary, then compile the CBLAS testings.
|
||||||
|
|
||||||
|
EXECUTING THE TESTERS
|
||||||
|
|
||||||
|
type:
|
||||||
|
|
||||||
|
prompt> make runtst
|
||||||
|
|
||||||
|
_______________________________________________________________________________
|
||||||
|
|
||||||
|
This package contains C interface to Legacy BLAS.
|
||||||
|
|
||||||
|
Written by Keita Teranishi (5/20/98)
|
||||||
|
_______________________________________________________________________________
|
||||||
|
|
||||||
|
This release updates an inconsistency between the BLAST document and
|
||||||
|
the interface. According to the document, the enumerated types for
|
||||||
|
the C interface to the BLAS are not typedef'ed.
|
||||||
|
|
||||||
|
It also updates the Level 2 and 3 testers which check for correct
|
||||||
|
exiting of routines when called with bad arguments. This is done by
|
||||||
|
overriding the Legacy BLAS library's implementation of xerbla(). If
|
||||||
|
this cannot be done ( for instance one cannot override some calls
|
||||||
|
to xerbla() in Sun's Performance library), then correct error
|
||||||
|
exiting cannot be checked.
|
||||||
|
|
||||||
|
Updated by Jeff Horner (3/15/99)
|
||||||
|
_______________________________________________________________________________
|
||||||
|
|
||||||
|
Updated by R. Clint Whaley (2/23/03):
|
||||||
|
|
||||||
|
Fixed the i?amax error that I reported three years ago: standard dictates
|
||||||
|
IAMAX return vals in range 0 <= iamax < N, but reference was mistakenly
|
||||||
|
returning like F77: 0 < iamax <= N.
|
||||||
|
_______________________________________________________________________________
|
||||||
|
|
||||||
|
Updated by Julie Langou (08/22/2014):
|
||||||
|
|
||||||
|
Integrate CBLAS package into LAPACK
|
||||||
|
Improve headers for mangling
|
||||||
|
|
@ -0,0 +1,9 @@
|
||||||
|
prefix=@prefix@
|
||||||
|
libdir=@libdir@
|
||||||
|
|
||||||
|
Name: lapacke
|
||||||
|
Description: C Standard Interface to BLAS Linear Algebra PACKage
|
||||||
|
Version: @LAPACK_VERSION@
|
||||||
|
URL: http://www.netlib.org/lapack/
|
||||||
|
Libs: -L${libdir} -lcblas
|
||||||
|
Requires: blas
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
# Load the LAPACK package with which we were built.
|
||||||
|
set(LAPACK_DIR "@LAPACK_BINARY_DIR@")
|
||||||
|
find_package(LAPACK NO_MODULE)
|
||||||
|
|
||||||
|
# Load lapack targets from the build tree, including lapacke targets.
|
||||||
|
if(NOT TARGET lapacke)
|
||||||
|
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Report lapacke header search locations.
|
||||||
|
set(CBLAS_INCLUDE_DIRS "@LAPACK_SOURCE_DIR@/cblas/include")
|
||||||
|
|
||||||
|
# Report lapacke libraries.
|
||||||
|
set(CBLAS_LIBRARIES cblas)
|
||||||
|
|
@ -0,0 +1,23 @@
|
||||||
|
# Compute locations from <prefix>/lib/cmake/lapacke-<v>/<self>.cmake
|
||||||
|
get_filename_component(_CBLAS_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||||
|
get_filename_component(_CBLAS_PREFIX "${_CBLAS_SELF_DIR}" PATH)
|
||||||
|
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
||||||
|
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
||||||
|
|
||||||
|
# Load the LAPACK package with which we were built.
|
||||||
|
set(LAPACK_DIR "${_CBLAS_PREFIX}/lib/cmake/lapack-@LAPACK_VERSION@")
|
||||||
|
find_package(LAPACK NO_MODULE)
|
||||||
|
|
||||||
|
# Load lapacke targets from the install tree.
|
||||||
|
if(NOT TARGET cblas)
|
||||||
|
include(${_CBLAS_SELF_DIR}/cblas-targets.cmake)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# Report lapacke header search locations.
|
||||||
|
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include)
|
||||||
|
|
||||||
|
# Report lapacke libraries.
|
||||||
|
set(CBLAS_LIBRARIES cblas)
|
||||||
|
|
||||||
|
unset(_CBLAS_PREFIX)
|
||||||
|
unset(_CBLAS_SELF_DIR)
|
||||||
|
|
@ -0,0 +1,8 @@
|
||||||
|
add_executable(xexample1_CBLAS cblas_example1.c )
|
||||||
|
add_executable(xexample2_CBLAS cblas_example2.c )
|
||||||
|
|
||||||
|
target_link_libraries(xexample1_CBLAS cblas ${BLAS_LIBRARIES})
|
||||||
|
target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES})
|
||||||
|
|
||||||
|
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS)
|
||||||
|
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS)
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
include ../../make.inc
|
||||||
|
|
||||||
|
all: example1 example2
|
||||||
|
|
||||||
|
example1:
|
||||||
|
$(CC) -c $(CFLAGS) -I../include cblas_example1.c
|
||||||
|
$(LOADER) -o cblas_ex1 cblas_example1.o $(CBLASLIB) $(BLASLIB)
|
||||||
|
|
||||||
|
example2:
|
||||||
|
$(CC) -c $(CFLAGS) -I../include cblas_example2.c
|
||||||
|
$(LOADER) -o cblas_ex2 cblas_example2.o $(CBLASLIB) $(BLASLIB)
|
||||||
|
|
||||||
|
cleanall:
|
||||||
|
rm -f *.o cblas_ex1 cblas_ex2
|
||||||
|
|
@ -0,0 +1,69 @@
|
||||||
|
/* cblas_example.c */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
|
||||||
|
int main ( )
|
||||||
|
{
|
||||||
|
CBLAS_LAYOUT Layout;
|
||||||
|
CBLAS_TRANSPOSE transa;
|
||||||
|
|
||||||
|
double *a, *x, *y;
|
||||||
|
double alpha, beta;
|
||||||
|
int m, n, lda, incx, incy, i;
|
||||||
|
|
||||||
|
Layout = CblasColMajor;
|
||||||
|
transa = CblasNoTrans;
|
||||||
|
|
||||||
|
m = 4; /* Size of Column ( the number of rows ) */
|
||||||
|
n = 4; /* Size of Row ( the number of columns ) */
|
||||||
|
lda = 4; /* Leading dimension of 5 * 4 matrix is 5 */
|
||||||
|
incx = 1;
|
||||||
|
incy = 1;
|
||||||
|
alpha = 1;
|
||||||
|
beta = 0;
|
||||||
|
|
||||||
|
a = (double *)malloc(sizeof(double)*m*n);
|
||||||
|
x = (double *)malloc(sizeof(double)*n);
|
||||||
|
y = (double *)malloc(sizeof(double)*n);
|
||||||
|
/* The elements of the first column */
|
||||||
|
a[0] = 1;
|
||||||
|
a[1] = 2;
|
||||||
|
a[2] = 3;
|
||||||
|
a[3] = 4;
|
||||||
|
/* The elements of the second column */
|
||||||
|
a[m] = 1;
|
||||||
|
a[m+1] = 1;
|
||||||
|
a[m+2] = 1;
|
||||||
|
a[m+3] = 1;
|
||||||
|
/* The elements of the third column */
|
||||||
|
a[m*2] = 3;
|
||||||
|
a[m*2+1] = 4;
|
||||||
|
a[m*2+2] = 5;
|
||||||
|
a[m*2+3] = 6;
|
||||||
|
/* The elements of the fourth column */
|
||||||
|
a[m*3] = 5;
|
||||||
|
a[m*3+1] = 6;
|
||||||
|
a[m*3+2] = 7;
|
||||||
|
a[m*3+3] = 8;
|
||||||
|
/* The elemetns of x and y */
|
||||||
|
x[0] = 1;
|
||||||
|
x[1] = 2;
|
||||||
|
x[2] = 1;
|
||||||
|
x[3] = 1;
|
||||||
|
y[0] = 0;
|
||||||
|
y[1] = 0;
|
||||||
|
y[2] = 0;
|
||||||
|
y[3] = 0;
|
||||||
|
|
||||||
|
cblas_dgemv( Layout, transa, m, n, alpha, a, lda, x, incx, beta,
|
||||||
|
y, incy );
|
||||||
|
/* Print y */
|
||||||
|
for( i = 0; i < n; i++ )
|
||||||
|
printf(" y%d = %f\n", i, y[i]);
|
||||||
|
free(a);
|
||||||
|
free(x);
|
||||||
|
free(y);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
/* cblas_example2.c */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
|
||||||
|
#define INVALID -1
|
||||||
|
|
||||||
|
int main (int argc, char **argv )
|
||||||
|
{
|
||||||
|
int rout=-1,info=0,m,n,k,lda,ldb,ldc;
|
||||||
|
double A[2] = {0.0,0.0},
|
||||||
|
B[2] = {0.0,0.0},
|
||||||
|
C[2] = {0.0,0.0},
|
||||||
|
ALPHA=0.0, BETA=0.0;
|
||||||
|
|
||||||
|
if (argc > 2){
|
||||||
|
rout = atoi(argv[1]);
|
||||||
|
info = atoi(argv[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rout == 1) {
|
||||||
|
if (info==0) {
|
||||||
|
printf("Checking if cblas_dgemm fails on parameter 4\n");
|
||||||
|
cblas_dgemm( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||||
|
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||||
|
}
|
||||||
|
if (info==1) {
|
||||||
|
printf("Checking if cblas_dgemm fails on parameter 5\n");
|
||||||
|
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||||
|
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||||
|
}
|
||||||
|
if (info==2) {
|
||||||
|
printf("Checking if cblas_dgemm fails on parameter 9\n");
|
||||||
|
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||||
|
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||||
|
}
|
||||||
|
if (info==3) {
|
||||||
|
printf("Checking if cblas_dgemm fails on parameter 11\n");
|
||||||
|
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 2,
|
||||||
|
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (info==0) {
|
||||||
|
printf("Checking if F77_dgemm fails on parameter 3\n");
|
||||||
|
m=INVALID; n=0; k=0; lda=1; ldb=1; ldc=1;
|
||||||
|
F77_dgemm( "T", "N", &m, &n, &k,
|
||||||
|
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||||
|
}
|
||||||
|
if (info==1) {
|
||||||
|
m=0; n=INVALID; k=0; lda=1; ldb=1; ldc=1;
|
||||||
|
printf("Checking if F77_dgemm fails on parameter 4\n");
|
||||||
|
F77_dgemm( "N", "T", &m, &n, &k,
|
||||||
|
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||||
|
}
|
||||||
|
if (info==2) {
|
||||||
|
printf("Checking if F77_dgemm fails on parameter 8\n");
|
||||||
|
m=2; n=0; k=0; lda=1; ldb=1; ldc=2;
|
||||||
|
F77_dgemm( "N", "N" , &m, &n, &k,
|
||||||
|
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||||
|
}
|
||||||
|
if (info==3) {
|
||||||
|
printf("Checking if F77_dgemm fails on parameter 10\n");
|
||||||
|
m=0; n=0; k=2; lda=1; ldb=1; ldc=1;
|
||||||
|
F77_dgemm( "N", "N" , &m, &n, &k,
|
||||||
|
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,3 @@
|
||||||
|
SET (CBLAS_INCLUDE cblas.h cblas_f77.h cblas_test.h cblas_mangling.h)
|
||||||
|
|
||||||
|
file(COPY ${CBLAS_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include)
|
||||||
|
|
@ -0,0 +1,588 @@
|
||||||
|
#ifndef CBLAS_H
|
||||||
|
#define CBLAS_H
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" { /* Assume C declarations for C++ */
|
||||||
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Enumerated and derived types
|
||||||
|
*/
|
||||||
|
#ifdef WeirdNEC
|
||||||
|
#define CBLAS_INDEX long
|
||||||
|
#else
|
||||||
|
#define CBLAS_INDEX int
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;
|
||||||
|
typedef enum {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113} CBLAS_TRANSPOSE;
|
||||||
|
typedef enum {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||||
|
typedef enum {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||||
|
typedef enum {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||||
|
|
||||||
|
typedef CBLAS_LAYOUT CBLAS_ORDER; /* this for backward compatibility with CBLAS_ORDER */
|
||||||
|
|
||||||
|
#include "cblas_mangling.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ===========================================================================
|
||||||
|
* Prototypes for level 1 BLAS functions (complex are recast as routines)
|
||||||
|
* ===========================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
double cblas_dcabs1(const void *z);
|
||||||
|
float cblas_scabs1(const void *c);
|
||||||
|
|
||||||
|
float cblas_sdsdot(const int N, const float alpha, const float *X,
|
||||||
|
const int incX, const float *Y, const int incY);
|
||||||
|
double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
|
||||||
|
const int incY);
|
||||||
|
float cblas_sdot(const int N, const float *X, const int incX,
|
||||||
|
const float *Y, const int incY);
|
||||||
|
double cblas_ddot(const int N, const double *X, const int incX,
|
||||||
|
const double *Y, const int incY);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Functions having prefixes Z and C only
|
||||||
|
*/
|
||||||
|
void cblas_cdotu_sub(const int N, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *dotu);
|
||||||
|
void cblas_cdotc_sub(const int N, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *dotc);
|
||||||
|
|
||||||
|
void cblas_zdotu_sub(const int N, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *dotu);
|
||||||
|
void cblas_zdotc_sub(const int N, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *dotc);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Functions having prefixes S D SC DZ
|
||||||
|
*/
|
||||||
|
float cblas_snrm2(const int N, const float *X, const int incX);
|
||||||
|
float cblas_sasum(const int N, const float *X, const int incX);
|
||||||
|
|
||||||
|
double cblas_dnrm2(const int N, const double *X, const int incX);
|
||||||
|
double cblas_dasum(const int N, const double *X, const int incX);
|
||||||
|
|
||||||
|
float cblas_scnrm2(const int N, const void *X, const int incX);
|
||||||
|
float cblas_scasum(const int N, const void *X, const int incX);
|
||||||
|
|
||||||
|
double cblas_dznrm2(const int N, const void *X, const int incX);
|
||||||
|
double cblas_dzasum(const int N, const void *X, const int incX);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Functions having standard 4 prefixes (S D C Z)
|
||||||
|
*/
|
||||||
|
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX);
|
||||||
|
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
|
||||||
|
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
|
||||||
|
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ===========================================================================
|
||||||
|
* Prototypes for level 1 BLAS routines
|
||||||
|
* ===========================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with standard 4 prefixes (s, d, c, z)
|
||||||
|
*/
|
||||||
|
void cblas_sswap(const int N, float *X, const int incX,
|
||||||
|
float *Y, const int incY);
|
||||||
|
void cblas_scopy(const int N, const float *X, const int incX,
|
||||||
|
float *Y, const int incY);
|
||||||
|
void cblas_saxpy(const int N, const float alpha, const float *X,
|
||||||
|
const int incX, float *Y, const int incY);
|
||||||
|
|
||||||
|
void cblas_dswap(const int N, double *X, const int incX,
|
||||||
|
double *Y, const int incY);
|
||||||
|
void cblas_dcopy(const int N, const double *X, const int incX,
|
||||||
|
double *Y, const int incY);
|
||||||
|
void cblas_daxpy(const int N, const double alpha, const double *X,
|
||||||
|
const int incX, double *Y, const int incY);
|
||||||
|
|
||||||
|
void cblas_cswap(const int N, void *X, const int incX,
|
||||||
|
void *Y, const int incY);
|
||||||
|
void cblas_ccopy(const int N, const void *X, const int incX,
|
||||||
|
void *Y, const int incY);
|
||||||
|
void cblas_caxpy(const int N, const void *alpha, const void *X,
|
||||||
|
const int incX, void *Y, const int incY);
|
||||||
|
|
||||||
|
void cblas_zswap(const int N, void *X, const int incX,
|
||||||
|
void *Y, const int incY);
|
||||||
|
void cblas_zcopy(const int N, const void *X, const int incX,
|
||||||
|
void *Y, const int incY);
|
||||||
|
void cblas_zaxpy(const int N, const void *alpha, const void *X,
|
||||||
|
const int incX, void *Y, const int incY);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with S and D prefix only
|
||||||
|
*/
|
||||||
|
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||||
|
void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
|
||||||
|
void cblas_srot(const int N, float *X, const int incX,
|
||||||
|
float *Y, const int incY, const float c, const float s);
|
||||||
|
void cblas_srotm(const int N, float *X, const int incX,
|
||||||
|
float *Y, const int incY, const float *P);
|
||||||
|
|
||||||
|
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||||
|
void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
|
||||||
|
void cblas_drot(const int N, double *X, const int incX,
|
||||||
|
double *Y, const int incY, const double c, const double s);
|
||||||
|
void cblas_drotm(const int N, double *X, const int incX,
|
||||||
|
double *Y, const int incY, const double *P);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with S D C Z CS and ZD prefixes
|
||||||
|
*/
|
||||||
|
void cblas_sscal(const int N, const float alpha, float *X, const int incX);
|
||||||
|
void cblas_dscal(const int N, const double alpha, double *X, const int incX);
|
||||||
|
void cblas_cscal(const int N, const void *alpha, void *X, const int incX);
|
||||||
|
void cblas_zscal(const int N, const void *alpha, void *X, const int incX);
|
||||||
|
void cblas_csscal(const int N, const float alpha, void *X, const int incX);
|
||||||
|
void cblas_zdscal(const int N, const double alpha, void *X, const int incX);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ===========================================================================
|
||||||
|
* Prototypes for level 2 BLAS
|
||||||
|
* ===========================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with standard 4 prefixes (S, D, C, Z)
|
||||||
|
*/
|
||||||
|
void cblas_sgemv(const CBLAS_LAYOUT layout,
|
||||||
|
const CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const float alpha, const float *A, const int lda,
|
||||||
|
const float *X, const int incX, const float beta,
|
||||||
|
float *Y, const int incY);
|
||||||
|
void cblas_sgbmv(CBLAS_LAYOUT layout,
|
||||||
|
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const int KL, const int KU, const float alpha,
|
||||||
|
const float *A, const int lda, const float *X,
|
||||||
|
const int incX, const float beta, float *Y, const int incY);
|
||||||
|
void cblas_strmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const float *A, const int lda,
|
||||||
|
float *X, const int incX);
|
||||||
|
void cblas_stbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const float *A, const int lda,
|
||||||
|
float *X, const int incX);
|
||||||
|
void cblas_stpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const float *Ap, float *X, const int incX);
|
||||||
|
void cblas_strsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const float *A, const int lda, float *X,
|
||||||
|
const int incX);
|
||||||
|
void cblas_stbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const float *A, const int lda,
|
||||||
|
float *X, const int incX);
|
||||||
|
void cblas_stpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const float *Ap, float *X, const int incX);
|
||||||
|
|
||||||
|
void cblas_dgemv(CBLAS_LAYOUT layout,
|
||||||
|
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const double alpha, const double *A, const int lda,
|
||||||
|
const double *X, const int incX, const double beta,
|
||||||
|
double *Y, const int incY);
|
||||||
|
void cblas_dgbmv(CBLAS_LAYOUT layout,
|
||||||
|
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const int KL, const int KU, const double alpha,
|
||||||
|
const double *A, const int lda, const double *X,
|
||||||
|
const int incX, const double beta, double *Y, const int incY);
|
||||||
|
void cblas_dtrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const double *A, const int lda,
|
||||||
|
double *X, const int incX);
|
||||||
|
void cblas_dtbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const double *A, const int lda,
|
||||||
|
double *X, const int incX);
|
||||||
|
void cblas_dtpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const double *Ap, double *X, const int incX);
|
||||||
|
void cblas_dtrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const double *A, const int lda, double *X,
|
||||||
|
const int incX);
|
||||||
|
void cblas_dtbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const double *A, const int lda,
|
||||||
|
double *X, const int incX);
|
||||||
|
void cblas_dtpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const double *Ap, double *X, const int incX);
|
||||||
|
|
||||||
|
void cblas_cgemv(CBLAS_LAYOUT layout,
|
||||||
|
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *X, const int incX, const void *beta,
|
||||||
|
void *Y, const int incY);
|
||||||
|
void cblas_cgbmv(CBLAS_LAYOUT layout,
|
||||||
|
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const int KL, const int KU, const void *alpha,
|
||||||
|
const void *A, const int lda, const void *X,
|
||||||
|
const int incX, const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_ctrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *A, const int lda,
|
||||||
|
void *X, const int incX);
|
||||||
|
void cblas_ctbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const void *A, const int lda,
|
||||||
|
void *X, const int incX);
|
||||||
|
void cblas_ctpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *Ap, void *X, const int incX);
|
||||||
|
void cblas_ctrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *A, const int lda, void *X,
|
||||||
|
const int incX);
|
||||||
|
void cblas_ctbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const void *A, const int lda,
|
||||||
|
void *X, const int incX);
|
||||||
|
void cblas_ctpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *Ap, void *X, const int incX);
|
||||||
|
|
||||||
|
void cblas_zgemv(CBLAS_LAYOUT layout,
|
||||||
|
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *X, const int incX, const void *beta,
|
||||||
|
void *Y, const int incY);
|
||||||
|
void cblas_zgbmv(CBLAS_LAYOUT layout,
|
||||||
|
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const int KL, const int KU, const void *alpha,
|
||||||
|
const void *A, const int lda, const void *X,
|
||||||
|
const int incX, const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_ztrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *A, const int lda,
|
||||||
|
void *X, const int incX);
|
||||||
|
void cblas_ztbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const void *A, const int lda,
|
||||||
|
void *X, const int incX);
|
||||||
|
void cblas_ztpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *Ap, void *X, const int incX);
|
||||||
|
void cblas_ztrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *A, const int lda, void *X,
|
||||||
|
const int incX);
|
||||||
|
void cblas_ztbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const void *A, const int lda,
|
||||||
|
void *X, const int incX);
|
||||||
|
void cblas_ztpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *Ap, void *X, const int incX);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with S and D prefixes only
|
||||||
|
*/
|
||||||
|
void cblas_ssymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const float *A,
|
||||||
|
const int lda, const float *X, const int incX,
|
||||||
|
const float beta, float *Y, const int incY);
|
||||||
|
void cblas_ssbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const int K, const float alpha, const float *A,
|
||||||
|
const int lda, const float *X, const int incX,
|
||||||
|
const float beta, float *Y, const int incY);
|
||||||
|
void cblas_sspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const float *Ap,
|
||||||
|
const float *X, const int incX,
|
||||||
|
const float beta, float *Y, const int incY);
|
||||||
|
void cblas_sger(CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const float alpha, const float *X, const int incX,
|
||||||
|
const float *Y, const int incY, float *A, const int lda);
|
||||||
|
void cblas_ssyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const float *X,
|
||||||
|
const int incX, float *A, const int lda);
|
||||||
|
void cblas_sspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const float *X,
|
||||||
|
const int incX, float *Ap);
|
||||||
|
void cblas_ssyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const float *X,
|
||||||
|
const int incX, const float *Y, const int incY, float *A,
|
||||||
|
const int lda);
|
||||||
|
void cblas_sspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const float *X,
|
||||||
|
const int incX, const float *Y, const int incY, float *A);
|
||||||
|
|
||||||
|
void cblas_dsymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const double *A,
|
||||||
|
const int lda, const double *X, const int incX,
|
||||||
|
const double beta, double *Y, const int incY);
|
||||||
|
void cblas_dsbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const int K, const double alpha, const double *A,
|
||||||
|
const int lda, const double *X, const int incX,
|
||||||
|
const double beta, double *Y, const int incY);
|
||||||
|
void cblas_dspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const double *Ap,
|
||||||
|
const double *X, const int incX,
|
||||||
|
const double beta, double *Y, const int incY);
|
||||||
|
void cblas_dger(CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const double alpha, const double *X, const int incX,
|
||||||
|
const double *Y, const int incY, double *A, const int lda);
|
||||||
|
void cblas_dsyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const double *X,
|
||||||
|
const int incX, double *A, const int lda);
|
||||||
|
void cblas_dspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const double *X,
|
||||||
|
const int incX, double *Ap);
|
||||||
|
void cblas_dsyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const double *X,
|
||||||
|
const int incX, const double *Y, const int incY, double *A,
|
||||||
|
const int lda);
|
||||||
|
void cblas_dspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const double *X,
|
||||||
|
const int incX, const double *Y, const int incY, double *A);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with C and Z prefixes only
|
||||||
|
*/
|
||||||
|
void cblas_chemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const void *alpha, const void *A,
|
||||||
|
const int lda, const void *X, const int incX,
|
||||||
|
const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_chbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const int K, const void *alpha, const void *A,
|
||||||
|
const int lda, const void *X, const int incX,
|
||||||
|
const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_chpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const void *alpha, const void *Ap,
|
||||||
|
const void *X, const int incX,
|
||||||
|
const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_cgeru(CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda);
|
||||||
|
void cblas_cgerc(CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda);
|
||||||
|
void cblas_cher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const void *X, const int incX,
|
||||||
|
void *A, const int lda);
|
||||||
|
void cblas_chpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const void *X,
|
||||||
|
const int incX, void *A);
|
||||||
|
void cblas_cher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda);
|
||||||
|
void cblas_chpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *Ap);
|
||||||
|
|
||||||
|
void cblas_zhemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const void *alpha, const void *A,
|
||||||
|
const int lda, const void *X, const int incX,
|
||||||
|
const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_zhbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const int K, const void *alpha, const void *A,
|
||||||
|
const int lda, const void *X, const int incX,
|
||||||
|
const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_zhpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const void *alpha, const void *Ap,
|
||||||
|
const void *X, const int incX,
|
||||||
|
const void *beta, void *Y, const int incY);
|
||||||
|
void cblas_zgeru(CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda);
|
||||||
|
void cblas_zgerc(CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda);
|
||||||
|
void cblas_zher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const void *X, const int incX,
|
||||||
|
void *A, const int lda);
|
||||||
|
void cblas_zhpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
const int N, const double alpha, const void *X,
|
||||||
|
const int incX, void *A);
|
||||||
|
void cblas_zher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda);
|
||||||
|
void cblas_zhpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *Ap);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ===========================================================================
|
||||||
|
* Prototypes for level 3 BLAS
|
||||||
|
* ===========================================================================
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with standard 4 prefixes (S, D, C, Z)
|
||||||
|
*/
|
||||||
|
void cblas_sgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||||
|
const int K, const float alpha, const float *A,
|
||||||
|
const int lda, const float *B, const int ldb,
|
||||||
|
const float beta, float *C, const int ldc);
|
||||||
|
void cblas_ssymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const float alpha, const float *A, const int lda,
|
||||||
|
const float *B, const int ldb, const float beta,
|
||||||
|
float *C, const int ldc);
|
||||||
|
void cblas_ssyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const float alpha, const float *A, const int lda,
|
||||||
|
const float beta, float *C, const int ldc);
|
||||||
|
void cblas_ssyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const float alpha, const float *A, const int lda,
|
||||||
|
const float *B, const int ldb, const float beta,
|
||||||
|
float *C, const int ldc);
|
||||||
|
void cblas_strmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const float alpha, const float *A, const int lda,
|
||||||
|
float *B, const int ldb);
|
||||||
|
void cblas_strsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const float alpha, const float *A, const int lda,
|
||||||
|
float *B, const int ldb);
|
||||||
|
|
||||||
|
void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||||
|
const int K, const double alpha, const double *A,
|
||||||
|
const int lda, const double *B, const int ldb,
|
||||||
|
const double beta, double *C, const int ldc);
|
||||||
|
void cblas_dsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const double alpha, const double *A, const int lda,
|
||||||
|
const double *B, const int ldb, const double beta,
|
||||||
|
double *C, const int ldc);
|
||||||
|
void cblas_dsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const double alpha, const double *A, const int lda,
|
||||||
|
const double beta, double *C, const int ldc);
|
||||||
|
void cblas_dsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const double alpha, const double *A, const int lda,
|
||||||
|
const double *B, const int ldb, const double beta,
|
||||||
|
double *C, const int ldc);
|
||||||
|
void cblas_dtrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const double alpha, const double *A, const int lda,
|
||||||
|
double *B, const int ldb);
|
||||||
|
void cblas_dtrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const double alpha, const double *A, const int lda,
|
||||||
|
double *B, const int ldb);
|
||||||
|
|
||||||
|
void cblas_cgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||||
|
const int K, const void *alpha, const void *A,
|
||||||
|
const int lda, const void *B, const int ldb,
|
||||||
|
const void *beta, void *C, const int ldc);
|
||||||
|
void cblas_csymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
void cblas_csyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *beta, void *C, const int ldc);
|
||||||
|
void cblas_csyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
void cblas_ctrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
void *B, const int ldb);
|
||||||
|
void cblas_ctrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
void *B, const int ldb);
|
||||||
|
|
||||||
|
void cblas_zgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||||
|
const int K, const void *alpha, const void *A,
|
||||||
|
const int lda, const void *B, const int ldb,
|
||||||
|
const void *beta, void *C, const int ldc);
|
||||||
|
void cblas_zsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
void cblas_zsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *beta, void *C, const int ldc);
|
||||||
|
void cblas_zsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
void cblas_ztrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
void *B, const int ldb);
|
||||||
|
void cblas_ztrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||||
|
CBLAS_DIAG Diag, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
void *B, const int ldb);
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Routines with prefixes C and Z only
|
||||||
|
*/
|
||||||
|
void cblas_chemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
void cblas_cherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const float alpha, const void *A, const int lda,
|
||||||
|
const float beta, void *C, const int ldc);
|
||||||
|
void cblas_cher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const float beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
|
||||||
|
void cblas_zhemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||||
|
CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
void cblas_zherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const double alpha, const void *A, const int lda,
|
||||||
|
const double beta, void *C, const int ldc);
|
||||||
|
void cblas_zher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||||
|
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const double beta,
|
||||||
|
void *C, const int ldc);
|
||||||
|
|
||||||
|
void cblas_xerbla(int p, const char *rout, const char *form, ...);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
@ -0,0 +1,394 @@
|
||||||
|
/*
|
||||||
|
* cblas_f77.h
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
*
|
||||||
|
* Updated by Jeff Horner
|
||||||
|
* Merged cblas_f77.h and cblas_fortran_header.h
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef CBLAS_F77_H
|
||||||
|
#define CBLAS_F77_H
|
||||||
|
|
||||||
|
#ifdef CRAY
|
||||||
|
#include <fortran.h>
|
||||||
|
#define F77_CHAR _fcd
|
||||||
|
#define C2F_CHAR(a) ( _cptofcd( (a), 1 ) )
|
||||||
|
#define C2F_STR(a, i) ( _cptofcd( (a), (i) ) )
|
||||||
|
#define F77_STRLEN(a) (_fcdlen)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef WeirdNEC
|
||||||
|
#define F77_INT long
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
#define FCHAR F77_CHAR
|
||||||
|
#else
|
||||||
|
#define FCHAR char *
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
#define FINT const F77_INT *
|
||||||
|
#define FINT2 F77_INT *
|
||||||
|
#else
|
||||||
|
#define FINT const int *
|
||||||
|
#define FINT2 int *
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Level 1 BLAS
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define F77_xerbla F77_GLOBAL(xerbla,XERBLA)
|
||||||
|
#define F77_srotg F77_GLOBAL(srotg,SROTG)
|
||||||
|
#define F77_srotmg F77_GLOBAL(srotmg,SROTMG)
|
||||||
|
#define F77_srot F77_GLOBAL(srot,SROT)
|
||||||
|
#define F77_srotm F77_GLOBAL(srotm,SROTM)
|
||||||
|
#define F77_drotg F77_GLOBAL(drotg,DROTG)
|
||||||
|
#define F77_drotmg F77_GLOBAL(drotmg,DROTMG)
|
||||||
|
#define F77_drot F77_GLOBAL(drot,DROT)
|
||||||
|
#define F77_drotm F77_GLOBAL(drotm,DROTM)
|
||||||
|
#define F77_sswap F77_GLOBAL(sswap,SSWAP)
|
||||||
|
#define F77_scopy F77_GLOBAL(scopy,SCOPY)
|
||||||
|
#define F77_saxpy F77_GLOBAL(saxpy,SAXPY)
|
||||||
|
#define F77_isamax_sub F77_GLOBAL(isamaxsub,ISAMAXSUB)
|
||||||
|
#define F77_dswap F77_GLOBAL(dswap,DSWAP)
|
||||||
|
#define F77_dcopy F77_GLOBAL(dcopy,DCOPY)
|
||||||
|
#define F77_daxpy F77_GLOBAL(daxpy,DAXPY)
|
||||||
|
#define F77_idamax_sub F77_GLOBAL(idamaxsub,IDAMAXSUB)
|
||||||
|
#define F77_cswap F77_GLOBAL(cswap,CSWAP)
|
||||||
|
#define F77_ccopy F77_GLOBAL(ccopy,CCOPY)
|
||||||
|
#define F77_caxpy F77_GLOBAL(caxpy,CAXPY)
|
||||||
|
#define F77_icamax_sub F77_GLOBAL(icamaxsub,ICAMAXSUB)
|
||||||
|
#define F77_zswap F77_GLOBAL(zswap,ZSWAP)
|
||||||
|
#define F77_zcopy F77_GLOBAL(zcopy,ZCOPY)
|
||||||
|
#define F77_zaxpy F77_GLOBAL(zaxpy,ZAXPY)
|
||||||
|
#define F77_izamax_sub F77_GLOBAL(izamaxsub,IZAMAXSUB)
|
||||||
|
#define F77_sdot_sub F77_GLOBAL(sdotsub,SDOTSUB)
|
||||||
|
#define F77_ddot_sub F77_GLOBAL(ddotsub,DDOTSUB)
|
||||||
|
#define F77_dsdot_sub F77_GLOBAL(dsdotsub,DSDOTSUB)
|
||||||
|
#define F77_sscal F77_GLOBAL(sscal,SSCAL)
|
||||||
|
#define F77_dscal F77_GLOBAL(dscal,DSCAL)
|
||||||
|
#define F77_cscal F77_GLOBAL(cscal,CSCAL)
|
||||||
|
#define F77_zscal F77_GLOBAL(zscal,ZSCAL)
|
||||||
|
#define F77_csscal F77_GLOBAL(csscal,CSSCAL)
|
||||||
|
#define F77_zdscal F77_GLOBAL(zdscal,ZDSCAL)
|
||||||
|
#define F77_cdotu_sub F77_GLOBAL(cdotusub,CDOTUSUB)
|
||||||
|
#define F77_cdotc_sub F77_GLOBAL(cdotcsub,CDOTCSUB)
|
||||||
|
#define F77_zdotu_sub F77_GLOBAL(zdotusub,ZDOTUSUB)
|
||||||
|
#define F77_zdotc_sub F77_GLOBAL(zdotcsub,ZDOTCSUB)
|
||||||
|
#define F77_snrm2_sub F77_GLOBAL(snrm2sub,SNRM2SUB)
|
||||||
|
#define F77_sasum_sub F77_GLOBAL(sasumsub,SASUMSUB)
|
||||||
|
#define F77_dnrm2_sub F77_GLOBAL(dnrm2sub,DNRM2SUB)
|
||||||
|
#define F77_dasum_sub F77_GLOBAL(dasumsub,DASUMSUB)
|
||||||
|
#define F77_scnrm2_sub F77_GLOBAL(scnrm2sub,SCNRM2SUB)
|
||||||
|
#define F77_scasum_sub F77_GLOBAL(scasumsub,SCASUMSUB)
|
||||||
|
#define F77_dznrm2_sub F77_GLOBAL(dznrm2sub,DZNRM2SUB)
|
||||||
|
#define F77_dzasum_sub F77_GLOBAL(dzasumsub,DZASUMSUB)
|
||||||
|
#define F77_sdsdot_sub F77_GLOBAL(sdsdotsub,SDSDOTSUB)
|
||||||
|
/*
|
||||||
|
* Level 2 BLAS
|
||||||
|
*/
|
||||||
|
#define F77_ssymv F77_GLOBAL(ssymv,SSYMY)
|
||||||
|
#define F77_ssbmv F77_GLOBAL(ssbmv,SSMBV)
|
||||||
|
#define F77_sspmv F77_GLOBAL(sspmv,SSPMV)
|
||||||
|
#define F77_sger F77_GLOBAL(sger,SGER)
|
||||||
|
#define F77_ssyr F77_GLOBAL(ssyr,SSYR)
|
||||||
|
#define F77_sspr F77_GLOBAL(sspr,SSPR)
|
||||||
|
#define F77_ssyr2 F77_GLOBAL(ssyr2,SSYR2)
|
||||||
|
#define F77_sspr2 F77_GLOBAL(sspr2,SSPR2)
|
||||||
|
#define F77_dsymv F77_GLOBAL(dsymv,DSYMV)
|
||||||
|
#define F77_dsbmv F77_GLOBAL(dsbmv,DSBMV)
|
||||||
|
#define F77_dspmv F77_GLOBAL(dspmv,DSPMV)
|
||||||
|
#define F77_dger F77_GLOBAL(dger,DGER)
|
||||||
|
#define F77_dsyr F77_GLOBAL(dsyr,DSYR)
|
||||||
|
#define F77_dspr F77_GLOBAL(dspr,DSPR)
|
||||||
|
#define F77_dsyr2 F77_GLOBAL(dsyr2,DSYR2)
|
||||||
|
#define F77_dspr2 F77_GLOBAL(dspr2,DSPR2)
|
||||||
|
#define F77_chemv F77_GLOBAL(chemv,CHEMV)
|
||||||
|
#define F77_chbmv F77_GLOBAL(chbmv,CHBMV)
|
||||||
|
#define F77_chpmv F77_GLOBAL(chpmv,CHPMV)
|
||||||
|
#define F77_cgeru F77_GLOBAL(cgeru,CGERU)
|
||||||
|
#define F77_cgerc F77_GLOBAL(cgerc,CGERC)
|
||||||
|
#define F77_cher F77_GLOBAL(cher,CHER)
|
||||||
|
#define F77_chpr F77_GLOBAL(chpr,CHPR)
|
||||||
|
#define F77_cher2 F77_GLOBAL(cher2,CHER2)
|
||||||
|
#define F77_chpr2 F77_GLOBAL(chpr2,CHPR2)
|
||||||
|
#define F77_zhemv F77_GLOBAL(zhemv,ZHEMV)
|
||||||
|
#define F77_zhbmv F77_GLOBAL(zhbmv,ZHBMV)
|
||||||
|
#define F77_zhpmv F77_GLOBAL(zhpmv,ZHPMV)
|
||||||
|
#define F77_zgeru F77_GLOBAL(zgeru,ZGERU)
|
||||||
|
#define F77_zgerc F77_GLOBAL(zgerc,ZGERC)
|
||||||
|
#define F77_zher F77_GLOBAL(zher,ZHER)
|
||||||
|
#define F77_zhpr F77_GLOBAL(zhpr,ZHPR)
|
||||||
|
#define F77_zher2 F77_GLOBAL(zher2,ZHER2)
|
||||||
|
#define F77_zhpr2 F77_GLOBAL(zhpr2,ZHPR2)
|
||||||
|
#define F77_sgemv F77_GLOBAL(sgemv,SGEMV)
|
||||||
|
#define F77_sgbmv F77_GLOBAL(sgbmv,SGBMV)
|
||||||
|
#define F77_strmv F77_GLOBAL(strmv,STRMV)
|
||||||
|
#define F77_stbmv F77_GLOBAL(stbmv,STBMV)
|
||||||
|
#define F77_stpmv F77_GLOBAL(stpmv,STPMV)
|
||||||
|
#define F77_strsv F77_GLOBAL(strsv,STRSV)
|
||||||
|
#define F77_stbsv F77_GLOBAL(stbsv,STBSV)
|
||||||
|
#define F77_stpsv F77_GLOBAL(stpsv,STPSV)
|
||||||
|
#define F77_dgemv F77_GLOBAL(dgemv,DGEMV)
|
||||||
|
#define F77_dgbmv F77_GLOBAL(dgbmv,DGBMV)
|
||||||
|
#define F77_dtrmv F77_GLOBAL(dtrmv,DTRMV)
|
||||||
|
#define F77_dtbmv F77_GLOBAL(dtbmv,DTBMV)
|
||||||
|
#define F77_dtpmv F77_GLOBAL(dtpmv,DTRMV)
|
||||||
|
#define F77_dtrsv F77_GLOBAL(dtrsv,DTRSV)
|
||||||
|
#define F77_dtbsv F77_GLOBAL(dtbsv,DTBSV)
|
||||||
|
#define F77_dtpsv F77_GLOBAL(dtpsv,DTPSV)
|
||||||
|
#define F77_cgemv F77_GLOBAL(cgemv,CGEMV)
|
||||||
|
#define F77_cgbmv F77_GLOBAL(cgbmv,CGBMV)
|
||||||
|
#define F77_ctrmv F77_GLOBAL(ctrmv,CTRMV)
|
||||||
|
#define F77_ctbmv F77_GLOBAL(ctbmv,CTBMV)
|
||||||
|
#define F77_ctpmv F77_GLOBAL(ctpmv,CTPMV)
|
||||||
|
#define F77_ctrsv F77_GLOBAL(ctrsv,CTRSV)
|
||||||
|
#define F77_ctbsv F77_GLOBAL(ctbsv,CTBSV)
|
||||||
|
#define F77_ctpsv F77_GLOBAL(ctpsv,CTPSV)
|
||||||
|
#define F77_zgemv F77_GLOBAL(zgemv,ZGEMV)
|
||||||
|
#define F77_zgbmv F77_GLOBAL(zgbmv,ZGBMV)
|
||||||
|
#define F77_ztrmv F77_GLOBAL(ztrmv,ZTRMV)
|
||||||
|
#define F77_ztbmv F77_GLOBAL(ztbmv,ZTBMV)
|
||||||
|
#define F77_ztpmv F77_GLOBAL(ztpmv,ZTPMV)
|
||||||
|
#define F77_ztrsv F77_GLOBAL(ztrsv,ZTRSV)
|
||||||
|
#define F77_ztbsv F77_GLOBAL(ztbsv,ZTBSV)
|
||||||
|
#define F77_ztpsv F77_GLOBAL(ztpsv,ZTPSV)
|
||||||
|
/*
|
||||||
|
* Level 3 BLAS
|
||||||
|
*/
|
||||||
|
#define F77_chemm F77_GLOBAL(chemm,CHEMM)
|
||||||
|
#define F77_cherk F77_GLOBAL(cherk,CHERK)
|
||||||
|
#define F77_cher2k F77_GLOBAL(cher2k,CHER2K)
|
||||||
|
#define F77_zhemm F77_GLOBAL(zhemm,ZHEMM)
|
||||||
|
#define F77_zherk F77_GLOBAL(zherk,ZHERK)
|
||||||
|
#define F77_zher2k F77_GLOBAL(zher2k,ZHER2K)
|
||||||
|
#define F77_sgemm F77_GLOBAL(sgemm,SGEMM)
|
||||||
|
#define F77_ssymm F77_GLOBAL(ssymm,SSYMM)
|
||||||
|
#define F77_ssyrk F77_GLOBAL(ssyrk,SSYRK)
|
||||||
|
#define F77_ssyr2k F77_GLOBAL(ssyr2k,SSYR2K)
|
||||||
|
#define F77_strmm F77_GLOBAL(strmm,STRMM)
|
||||||
|
#define F77_strsm F77_GLOBAL(strsm,STRSM)
|
||||||
|
#define F77_dgemm F77_GLOBAL(dgemm,DGEMM)
|
||||||
|
#define F77_dsymm F77_GLOBAL(dsymm,DSYMM)
|
||||||
|
#define F77_dsyrk F77_GLOBAL(dsyrk,DSYRK)
|
||||||
|
#define F77_dsyr2k F77_GLOBAL(dsyr2k,DSYR2K)
|
||||||
|
#define F77_dtrmm F77_GLOBAL(dtrmm,DTRMM)
|
||||||
|
#define F77_dtrsm F77_GLOBAL(dtrsm,DTRSM)
|
||||||
|
#define F77_cgemm F77_GLOBAL(cgemm,CGEMM)
|
||||||
|
#define F77_csymm F77_GLOBAL(csymm,CSYMM)
|
||||||
|
#define F77_csyrk F77_GLOBAL(csyrk,CSYRK)
|
||||||
|
#define F77_csyr2k F77_GLOBAL(csyr2k,CSYR2K)
|
||||||
|
#define F77_ctrmm F77_GLOBAL(ctrmm,CTRMM)
|
||||||
|
#define F77_ctrsm F77_GLOBAL(ctrsm,CTRSM)
|
||||||
|
#define F77_zgemm F77_GLOBAL(zgemm,ZGEMM)
|
||||||
|
#define F77_zsymm F77_GLOBAL(zsymm,ZSYMM)
|
||||||
|
#define F77_zsyrk F77_GLOBAL(zsyrk,ZSYRK)
|
||||||
|
#define F77_zsyr2k F77_GLOBAL(zsyr2k,ZSYR2K)
|
||||||
|
#define F77_ztrmm F77_GLOBAL(ztrmm,ZTRMM)
|
||||||
|
#define F77_ztrsm F77_GLOBAL(ztrsm,ZTRSM)
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void F77_xerbla(FCHAR, void *);
|
||||||
|
/*
|
||||||
|
* Level 1 Fortran Prototypes
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Single Precision */
|
||||||
|
|
||||||
|
void F77_srot(FINT, float *, FINT, float *, FINT, const float *, const float *);
|
||||||
|
void F77_srotg(float *,float *,float *,float *);
|
||||||
|
void F77_srotm( FINT, float *, FINT, float *, FINT, const float *);
|
||||||
|
void F77_srotmg(float *,float *,float *,const float *, float *);
|
||||||
|
void F77_sswap( FINT, float *, FINT, float *, FINT);
|
||||||
|
void F77_scopy( FINT, const float *, FINT, float *, FINT);
|
||||||
|
void F77_saxpy( FINT, const float *, const float *, FINT, float *, FINT);
|
||||||
|
void F77_sdot_sub(FINT, const float *, FINT, const float *, FINT, float *);
|
||||||
|
void F77_sdsdot_sub( FINT, const float *, const float *, FINT, const float *, FINT, float *);
|
||||||
|
void F77_sscal( FINT, const float *, float *, FINT);
|
||||||
|
void F77_snrm2_sub( FINT, const float *, FINT, float *);
|
||||||
|
void F77_sasum_sub( FINT, const float *, FINT, float *);
|
||||||
|
void F77_isamax_sub( FINT, const float * , FINT, FINT2);
|
||||||
|
|
||||||
|
/* Double Precision */
|
||||||
|
|
||||||
|
void F77_drot(FINT, double *, FINT, double *, FINT, const double *, const double *);
|
||||||
|
void F77_drotg(double *,double *,double *,double *);
|
||||||
|
void F77_drotm( FINT, double *, FINT, double *, FINT, const double *);
|
||||||
|
void F77_drotmg(double *,double *,double *,const double *, double *);
|
||||||
|
void F77_dswap( FINT, double *, FINT, double *, FINT);
|
||||||
|
void F77_dcopy( FINT, const double *, FINT, double *, FINT);
|
||||||
|
void F77_daxpy( FINT, const double *, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dswap( FINT, double *, FINT, double *, FINT);
|
||||||
|
void F77_dsdot_sub(FINT, const float *, FINT, const float *, FINT, double *);
|
||||||
|
void F77_ddot_sub( FINT, const double *, FINT, const double *, FINT, double *);
|
||||||
|
void F77_dscal( FINT, const double *, double *, FINT);
|
||||||
|
void F77_dnrm2_sub( FINT, const double *, FINT, double *);
|
||||||
|
void F77_dasum_sub( FINT, const double *, FINT, double *);
|
||||||
|
void F77_idamax_sub( FINT, const double * , FINT, FINT2);
|
||||||
|
|
||||||
|
/* Single Complex Precision */
|
||||||
|
|
||||||
|
void F77_cswap( FINT, void *, FINT, void *, FINT);
|
||||||
|
void F77_ccopy( FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_caxpy( FINT, const void *, const void *, FINT, void *, FINT);
|
||||||
|
void F77_cswap( FINT, void *, FINT, void *, FINT);
|
||||||
|
void F77_cdotc_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||||
|
void F77_cdotu_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||||
|
void F77_cscal( FINT, const void *, void *, FINT);
|
||||||
|
void F77_icamax_sub( FINT, const void *, FINT, FINT2);
|
||||||
|
void F77_csscal( FINT, const float *, void *, FINT);
|
||||||
|
void F77_scnrm2_sub( FINT, const void *, FINT, float *);
|
||||||
|
void F77_scasum_sub( FINT, const void *, FINT, float *);
|
||||||
|
|
||||||
|
/* Double Complex Precision */
|
||||||
|
|
||||||
|
void F77_zswap( FINT, void *, FINT, void *, FINT);
|
||||||
|
void F77_zcopy( FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_zaxpy( FINT, const void *, const void *, FINT, void *, FINT);
|
||||||
|
void F77_zswap( FINT, void *, FINT, void *, FINT);
|
||||||
|
void F77_zdotc_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||||
|
void F77_zdotu_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||||
|
void F77_zdscal( FINT, const double *, void *, FINT);
|
||||||
|
void F77_zscal( FINT, const void *, void *, FINT);
|
||||||
|
void F77_dznrm2_sub( FINT, const void *, FINT, double *);
|
||||||
|
void F77_dzasum_sub( FINT, const void *, FINT, double *);
|
||||||
|
void F77_izamax_sub( FINT, const void *, FINT, FINT2);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Level 2 Fortran Prototypes
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Single Precision */
|
||||||
|
|
||||||
|
void F77_sgemv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_sgbmv(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_ssymv(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_ssbmv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_sspmv(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_strmv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
|
||||||
|
void F77_stbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT);
|
||||||
|
void F77_strsv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
|
||||||
|
void F77_stbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT);
|
||||||
|
void F77_stpmv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT);
|
||||||
|
void F77_stpsv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT);
|
||||||
|
void F77_sger( FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT);
|
||||||
|
void F77_ssyr(FCHAR, FINT, const float *, const float *, FINT, float *, FINT);
|
||||||
|
void F77_sspr(FCHAR, FINT, const float *, const float *, FINT, float *);
|
||||||
|
void F77_sspr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *);
|
||||||
|
void F77_ssyr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT);
|
||||||
|
|
||||||
|
/* Double Precision */
|
||||||
|
|
||||||
|
void F77_dgemv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dgbmv(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dsymv(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dsbmv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dspmv(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dtrmv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dtbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dtrsv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dtbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dtpmv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dtpsv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dger( FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dsyr(FCHAR, FINT, const double *, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dspr(FCHAR, FINT, const double *, const double *, FINT, double *);
|
||||||
|
void F77_dspr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *);
|
||||||
|
void F77_dsyr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT);
|
||||||
|
|
||||||
|
/* Single Complex Precision */
|
||||||
|
|
||||||
|
void F77_cgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_cgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_chemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_chbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_chpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_ctrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ctbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ctpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT);
|
||||||
|
void F77_ctrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ctbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ctpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT);
|
||||||
|
void F77_cgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_cgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_cher(FCHAR, FINT, const float *, const void *, FINT, void *, FINT);
|
||||||
|
void F77_cher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_chpr(FCHAR, FINT, const float *, const void *, FINT, void *);
|
||||||
|
void F77_chpr2(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void *);
|
||||||
|
|
||||||
|
/* Double Complex Precision */
|
||||||
|
|
||||||
|
void F77_zgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_zgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_zhemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_zhbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_zhpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT);
|
||||||
|
void F77_ztrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ztbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ztpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT);
|
||||||
|
void F77_ztrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ztbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_ztpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT);
|
||||||
|
void F77_zgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_zgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_zher(FCHAR, FINT, const double *, const void *, FINT, void *, FINT);
|
||||||
|
void F77_zher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||||
|
void F77_zhpr(FCHAR, FINT, const double *, const void *, FINT, void *);
|
||||||
|
void F77_zhpr2(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void *);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Level 3 Fortran Prototypes
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Single Precision */
|
||||||
|
|
||||||
|
void F77_sgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_ssymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_ssyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_ssyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_strmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||||
|
void F77_strsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||||
|
|
||||||
|
/* Double Precision */
|
||||||
|
|
||||||
|
void F77_dgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_dtrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||||
|
void F77_dtrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||||
|
|
||||||
|
/* Single Complex Precision */
|
||||||
|
|
||||||
|
void F77_cgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_csymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_chemm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_csyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_cherk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_csyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_cher2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||||
|
void F77_ctrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||||
|
void F77_ctrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||||
|
|
||||||
|
/* Double Complex Precision */
|
||||||
|
|
||||||
|
void F77_zgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_zsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_zhemm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_zsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_zherk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_zsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_zher2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||||
|
void F77_ztrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||||
|
void F77_ztrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* CBLAS_F77_H */
|
||||||
|
|
@ -0,0 +1,17 @@
|
||||||
|
#ifndef F77_HEADER_INCLUDED
|
||||||
|
#define F77_HEADER_INCLUDED
|
||||||
|
|
||||||
|
#ifndef F77_GLOBAL
|
||||||
|
#if defined(F77_GLOBAL_PATTERN_LC) || defined(ADD_)
|
||||||
|
#define F77_GLOBAL(lcname,UCNAME) lcname##_
|
||||||
|
#elif defined(F77_GLOBAL_PATTERN_UC) || defined(UPPER)
|
||||||
|
#define F77_GLOBAL(lcname,UCNAME) UCNAME
|
||||||
|
#elif defined(F77_GLOBAL_PATTERN_MC) || defined(NOCHANGE)
|
||||||
|
#define F77_GLOBAL(lcname,UCNAME) lcname
|
||||||
|
#else
|
||||||
|
#define F77_GLOBAL(lcname,UCNAME) lcname##_
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
@ -0,0 +1,190 @@
|
||||||
|
/*
|
||||||
|
* cblas_test.h
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
*/
|
||||||
|
#ifndef CBLAS_TEST_H
|
||||||
|
#define CBLAS_TEST_H
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_mangling.h"
|
||||||
|
|
||||||
|
#define TRUE 1
|
||||||
|
#define PASSED 1
|
||||||
|
#define TEST_ROW_MJR 1
|
||||||
|
|
||||||
|
#define FALSE 0
|
||||||
|
#define FAILED 0
|
||||||
|
#define TEST_COL_MJR 0
|
||||||
|
|
||||||
|
#define INVALID -1
|
||||||
|
#define UNDEFINED -1
|
||||||
|
|
||||||
|
typedef struct { float real; float imag; } CBLAS_TEST_COMPLEX;
|
||||||
|
typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX;
|
||||||
|
|
||||||
|
#define F77_xerbla F77_GLOBAL(xerbla,XERBLA)
|
||||||
|
/*
|
||||||
|
* Level 1 BLAS
|
||||||
|
*/
|
||||||
|
#define F77_srotg F77_GLOBAL(srotgtest,SROTGTEST)
|
||||||
|
#define F77_srotmg F77_GLOBAL(srotmgtest,SROTMGTEST)
|
||||||
|
#define F77_srot F77_GLOBAL(srottest,SROTTEST)
|
||||||
|
#define F77_srotm F77_GLOBAL(srotmtest,SROTMTEST)
|
||||||
|
#define F77_drotg F77_GLOBAL(drotgtest,DROTGTEST)
|
||||||
|
#define F77_drotmg F77_GLOBAL(drotmgtest,DROTMGTEST)
|
||||||
|
#define F77_drot F77_GLOBAL(drottest,DROTTEST)
|
||||||
|
#define F77_drotm F77_GLOBAL(drotmtest,DROTMTEST)
|
||||||
|
#define F77_sswap F77_GLOBAL(sswaptest,SSWAPTEST)
|
||||||
|
#define F77_scopy F77_GLOBAL(scopytest,SCOPYTEST)
|
||||||
|
#define F77_saxpy F77_GLOBAL(saxpytest,SAXPYTEST)
|
||||||
|
#define F77_isamax F77_GLOBAL(isamaxtest,ISAMAXTEST)
|
||||||
|
#define F77_dswap F77_GLOBAL(dswaptest,DSWAPTEST)
|
||||||
|
#define F77_dcopy F77_GLOBAL(dcopytest,DCOPYTEST)
|
||||||
|
#define F77_daxpy F77_GLOBAL(daxpytest,DAXPYTEST)
|
||||||
|
#define F77_idamax F77_GLOBAL(idamaxtest,IDAMAXTEST)
|
||||||
|
#define F77_cswap F77_GLOBAL(cswaptest,CSWAPTEST)
|
||||||
|
#define F77_ccopy F77_GLOBAL(ccopytest,CCOPYTEST)
|
||||||
|
#define F77_caxpy F77_GLOBAL(caxpytest,CAXPYTEST)
|
||||||
|
#define F77_icamax F77_GLOBAL(icamaxtest,ICAMAXTEST)
|
||||||
|
#define F77_zswap F77_GLOBAL(zswaptest,ZSWAPTEST)
|
||||||
|
#define F77_zcopy F77_GLOBAL(zcopytest,ZCOPYTEST)
|
||||||
|
#define F77_zaxpy F77_GLOBAL(zaxpytest,ZAXPYTEST)
|
||||||
|
#define F77_izamax F77_GLOBAL(izamaxtest,IZAMAXTEST)
|
||||||
|
#define F77_sdot F77_GLOBAL(sdottest,SDOTTEST)
|
||||||
|
#define F77_ddot F77_GLOBAL(ddottest,DDOTTEST)
|
||||||
|
#define F77_dsdot F77_GLOBAL(dsdottest,DSDOTTEST)
|
||||||
|
#define F77_sscal F77_GLOBAL(sscaltest,SSCALTEST)
|
||||||
|
#define F77_dscal F77_GLOBAL(dscaltest,DSCALTEST)
|
||||||
|
#define F77_cscal F77_GLOBAL(cscaltest,CSCALTEST)
|
||||||
|
#define F77_zscal F77_GLOBAL(zscaltest,ZSCALTEST)
|
||||||
|
#define F77_csscal F77_GLOBAL(csscaltest,CSSCALTEST)
|
||||||
|
#define F77_zdscal F77_GLOBAL(zdscaltest,ZDSCALTEST)
|
||||||
|
#define F77_cdotu F77_GLOBAL(cdotutest,CDOTUTEST)
|
||||||
|
#define F77_cdotc F77_GLOBAL(cdotctest,CDOTCTEST)
|
||||||
|
#define F77_zdotu F77_GLOBAL(zdotutest,ZDOTUTEST)
|
||||||
|
#define F77_zdotc F77_GLOBAL(zdotctest,ZDOTCTEST)
|
||||||
|
#define F77_snrm2 F77_GLOBAL(snrm2test,SNRM2TEST)
|
||||||
|
#define F77_sasum F77_GLOBAL(sasumtest,SASUMTEST)
|
||||||
|
#define F77_dnrm2 F77_GLOBAL(dnrm2test,DNRM2TEST)
|
||||||
|
#define F77_dasum F77_GLOBAL(dasumtest,DASUMTEST)
|
||||||
|
#define F77_scnrm2 F77_GLOBAL(scnrm2test,SCNRM2TEST)
|
||||||
|
#define F77_scasum F77_GLOBAL(scasumtest,SCASUMTEST)
|
||||||
|
#define F77_dznrm2 F77_GLOBAL(dznrm2test,DZNRM2TEST)
|
||||||
|
#define F77_dzasum F77_GLOBAL(dzasumtest,DZASUMTEST)
|
||||||
|
#define F77_sdsdot F77_GLOBAL(sdsdottest, SDSDOTTEST)
|
||||||
|
/*
|
||||||
|
* Level 2 BLAS
|
||||||
|
*/
|
||||||
|
#define F77_s2chke F77_GLOBAL(cs2chke,CS2CHKE)
|
||||||
|
#define F77_d2chke F77_GLOBAL(cd2chke,CD2CHKE)
|
||||||
|
#define F77_c2chke F77_GLOBAL(cc2chke,CC2CHKE)
|
||||||
|
#define F77_z2chke F77_GLOBAL(cz2chke,CZ2CHKE)
|
||||||
|
#define F77_ssymv F77_GLOBAL(cssymv,CSSYMV)
|
||||||
|
#define F77_ssbmv F77_GLOBAL(cssbmv,CSSBMV)
|
||||||
|
#define F77_sspmv F77_GLOBAL(csspmv,CSSPMV)
|
||||||
|
#define F77_sger F77_GLOBAL(csger,CSGER)
|
||||||
|
#define F77_ssyr F77_GLOBAL(cssyr,CSSYR)
|
||||||
|
#define F77_sspr F77_GLOBAL(csspr,CSSPR)
|
||||||
|
#define F77_ssyr2 F77_GLOBAL(cssyr2,CSSYR2)
|
||||||
|
#define F77_sspr2 F77_GLOBAL(csspr2,CSSPR2)
|
||||||
|
#define F77_dsymv F77_GLOBAL(cdsymv,CDSYMV)
|
||||||
|
#define F77_dsbmv F77_GLOBAL(cdsbmv,CDSBMV)
|
||||||
|
#define F77_dspmv F77_GLOBAL(cdspmv,CDSPMV)
|
||||||
|
#define F77_dger F77_GLOBAL(cdger,CDGER)
|
||||||
|
#define F77_dsyr F77_GLOBAL(cdsyr,CDSYR)
|
||||||
|
#define F77_dspr F77_GLOBAL(cdspr,CDSPR)
|
||||||
|
#define F77_dsyr2 F77_GLOBAL(cdsyr2,CDSYR2)
|
||||||
|
#define F77_dspr2 F77_GLOBAL(cdspr2,CDSPR2)
|
||||||
|
#define F77_chemv F77_GLOBAL(cchemv,CCHEMV)
|
||||||
|
#define F77_chbmv F77_GLOBAL(cchbmv,CCHBMV)
|
||||||
|
#define F77_chpmv F77_GLOBAL(cchpmv,CCHPMV)
|
||||||
|
#define F77_cgeru F77_GLOBAL(ccgeru,CCGERU)
|
||||||
|
#define F77_cgerc F77_GLOBAL(ccgerc,CCGERC)
|
||||||
|
#define F77_cher F77_GLOBAL(ccher,CCHER)
|
||||||
|
#define F77_chpr F77_GLOBAL(cchpr,CCHPR)
|
||||||
|
#define F77_cher2 F77_GLOBAL(ccher2,CCHER2)
|
||||||
|
#define F77_chpr2 F77_GLOBAL(cchpr2,CCHPR2)
|
||||||
|
#define F77_zhemv F77_GLOBAL(czhemv,CZHEMV)
|
||||||
|
#define F77_zhbmv F77_GLOBAL(czhbmv,CZHBMV)
|
||||||
|
#define F77_zhpmv F77_GLOBAL(czhpmv,CZHPMV)
|
||||||
|
#define F77_zgeru F77_GLOBAL(czgeru,CZGERU)
|
||||||
|
#define F77_zgerc F77_GLOBAL(czgerc,CZGERC)
|
||||||
|
#define F77_zher F77_GLOBAL(czher,CZHER)
|
||||||
|
#define F77_zhpr F77_GLOBAL(czhpr,CZHPR)
|
||||||
|
#define F77_zher2 F77_GLOBAL(czher2,CZHER2)
|
||||||
|
#define F77_zhpr2 F77_GLOBAL(czhpr2,CZHPR2)
|
||||||
|
#define F77_sgemv F77_GLOBAL(csgemv,CSGEMV)
|
||||||
|
#define F77_sgbmv F77_GLOBAL(csgbmv,CSGBMV)
|
||||||
|
#define F77_strmv F77_GLOBAL(cstrmv,CSTRMV)
|
||||||
|
#define F77_stbmv F77_GLOBAL(cstbmv,CSTBMV)
|
||||||
|
#define F77_stpmv F77_GLOBAL(cstpmv,CSTPMV)
|
||||||
|
#define F77_strsv F77_GLOBAL(cstrsv,CSTRSV)
|
||||||
|
#define F77_stbsv F77_GLOBAL(cstbsv,CSTBSV)
|
||||||
|
#define F77_stpsv F77_GLOBAL(cstpsv,CSTPSV)
|
||||||
|
#define F77_dgemv F77_GLOBAL(cdgemv,CDGEMV)
|
||||||
|
#define F77_dgbmv F77_GLOBAL(cdgbmv,CDGBMV)
|
||||||
|
#define F77_dtrmv F77_GLOBAL(cdtrmv,CDTRMV)
|
||||||
|
#define F77_dtbmv F77_GLOBAL(cdtbmv,CDTBMV)
|
||||||
|
#define F77_dtpmv F77_GLOBAL(cdtpmv,CDTPMV)
|
||||||
|
#define F77_dtrsv F77_GLOBAL(cdtrsv,CDTRSV)
|
||||||
|
#define F77_dtbsv F77_GLOBAL(cdtbsv,CDTBSV)
|
||||||
|
#define F77_dtpsv F77_GLOBAL(cdtpsv,CDTPSV)
|
||||||
|
#define F77_cgemv F77_GLOBAL(ccgemv,CCGEMV)
|
||||||
|
#define F77_cgbmv F77_GLOBAL(ccgbmv,CCGBMV)
|
||||||
|
#define F77_ctrmv F77_GLOBAL(cctrmv,CCTRMV)
|
||||||
|
#define F77_ctbmv F77_GLOBAL(cctbmv,CCTPMV)
|
||||||
|
#define F77_ctpmv F77_GLOBAL(cctpmv,CCTPMV)
|
||||||
|
#define F77_ctrsv F77_GLOBAL(cctrsv,CCTRSV)
|
||||||
|
#define F77_ctbsv F77_GLOBAL(cctbsv,CCTBSV)
|
||||||
|
#define F77_ctpsv F77_GLOBAL(cctpsv,CCTPSV)
|
||||||
|
#define F77_zgemv F77_GLOBAL(czgemv,CZGEMV)
|
||||||
|
#define F77_zgbmv F77_GLOBAL(czgbmv,CZGBMV)
|
||||||
|
#define F77_ztrmv F77_GLOBAL(cztrmv,CZTRMV)
|
||||||
|
#define F77_ztbmv F77_GLOBAL(cztbmv,CZTBMV)
|
||||||
|
#define F77_ztpmv F77_GLOBAL(cztpmv,CZTPMV)
|
||||||
|
#define F77_ztrsv F77_GLOBAL(cztrsv,CZTRSV)
|
||||||
|
#define F77_ztbsv F77_GLOBAL(cztbsv,CZTBSV)
|
||||||
|
#define F77_ztpsv F77_GLOBAL(cztpsv,CZTPSV)
|
||||||
|
/*
|
||||||
|
* Level 3 BLAS
|
||||||
|
*/
|
||||||
|
#define F77_s3chke F77_GLOBAL(cs3chke,CS3CHKE)
|
||||||
|
#define F77_d3chke F77_GLOBAL(cd3chke,CD3CHKE)
|
||||||
|
#define F77_c3chke F77_GLOBAL(cc3chke,CC3CHKE)
|
||||||
|
#define F77_z3chke F77_GLOBAL(cz3chke,CZ3CHKE)
|
||||||
|
#define F77_chemm F77_GLOBAL(cchemm,CCHEMM)
|
||||||
|
#define F77_cherk F77_GLOBAL(ccherk,CCHERK)
|
||||||
|
#define F77_cher2k F77_GLOBAL(ccher2k,CCHER2K)
|
||||||
|
#define F77_zhemm F77_GLOBAL(czhemm,CZHEMM)
|
||||||
|
#define F77_zherk F77_GLOBAL(czherk,CZHERK)
|
||||||
|
#define F77_zher2k F77_GLOBAL(czher2k,CZHER2K)
|
||||||
|
#define F77_sgemm F77_GLOBAL(csgemm,CSGEMM)
|
||||||
|
#define F77_ssymm F77_GLOBAL(cssymm,CSSYMM)
|
||||||
|
#define F77_ssyrk F77_GLOBAL(cssyrk,CSSYRK)
|
||||||
|
#define F77_ssyr2k F77_GLOBAL(cssyr2k,CSSYR2K)
|
||||||
|
#define F77_strmm F77_GLOBAL(cstrmm,CSTRMM)
|
||||||
|
#define F77_strsm F77_GLOBAL(cstrsm,CSTRSM)
|
||||||
|
#define F77_dgemm F77_GLOBAL(cdgemm,CDGEMM)
|
||||||
|
#define F77_dsymm F77_GLOBAL(cdsymm,CDSYMM)
|
||||||
|
#define F77_dsyrk F77_GLOBAL(cdsyrk,CDSYRK)
|
||||||
|
#define F77_dsyr2k F77_GLOBAL(cdsyr2k,CDSYR2K)
|
||||||
|
#define F77_dtrmm F77_GLOBAL(cdtrmm,CDTRMM)
|
||||||
|
#define F77_dtrsm F77_GLOBAL(cdtrsm,CDTRSM)
|
||||||
|
#define F77_cgemm F77_GLOBAL(ccgemm,CCGEMM)
|
||||||
|
#define F77_csymm F77_GLOBAL(ccsymm,CCSYMM)
|
||||||
|
#define F77_csyrk F77_GLOBAL(ccsyrk,CCSYRK)
|
||||||
|
#define F77_csyr2k F77_GLOBAL(ccsyr2k,CCSYR2K)
|
||||||
|
#define F77_ctrmm F77_GLOBAL(cctrmm,CCTRMM)
|
||||||
|
#define F77_ctrsm F77_GLOBAL(cctrsm,CCTRSM)
|
||||||
|
#define F77_zgemm F77_GLOBAL(czgemm,CZGEMM)
|
||||||
|
#define F77_zsymm F77_GLOBAL(czsymm,CZSYMM)
|
||||||
|
#define F77_zsyrk F77_GLOBAL(czsyrk,CZSYRK)
|
||||||
|
#define F77_zsyr2k F77_GLOBAL(czsyr2k,CZSYR2K)
|
||||||
|
#define F77_ztrmm F77_GLOBAL(cztrmm,CZTRMM)
|
||||||
|
#define F77_ztrsm F77_GLOBAL(cztrsm, CZTRSM)
|
||||||
|
|
||||||
|
void get_transpose_type(char *type, CBLAS_TRANSPOSE *trans);
|
||||||
|
void get_uplo_type(char *type, CBLAS_UPLO *uplo);
|
||||||
|
void get_diag_type(char *type, CBLAS_DIAG *diag);
|
||||||
|
void get_side_type(char *type, CBLAS_SIDE *side);
|
||||||
|
|
||||||
|
#endif /* CBLAS_TEST_H */
|
||||||
|
|
@ -0,0 +1,168 @@
|
||||||
|
# This Makefile compiles the CBLAS routines
|
||||||
|
#
|
||||||
|
# Error handling routines for level 2 & 3
|
||||||
|
|
||||||
|
set (ERRHAND cblas_globals.c cblas_xerbla.c xerbla.c)
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# CBLAS routines
|
||||||
|
#
|
||||||
|
# Level 1
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single real precision
|
||||||
|
#
|
||||||
|
set (SLEV1 cblas_srotg.c cblas_srotmg.c cblas_srot.c cblas_srotm.c
|
||||||
|
cblas_sswap.c cblas_sscal.c cblas_scopy.c cblas_saxpy.c
|
||||||
|
cblas_sdot.c cblas_sdsdot.c cblas_snrm2.c cblas_sasum.c
|
||||||
|
cblas_isamax.c sdotsub.f sdsdotsub.f snrm2sub.f sasumsub.f
|
||||||
|
isamaxsub.f)
|
||||||
|
#
|
||||||
|
# All object files for double real precision
|
||||||
|
#
|
||||||
|
set (DLEV1 cblas_drotg.c cblas_drotmg.c cblas_drot.c cblas_drotm.c
|
||||||
|
cblas_dswap.c cblas_dscal.c cblas_dcopy.c cblas_daxpy.c
|
||||||
|
cblas_ddot.c cblas_dsdot.c cblas_dnrm2.c cblas_dasum.c
|
||||||
|
cblas_idamax.c ddotsub.f dsdotsub.f dnrm2sub.f
|
||||||
|
dasumsub.f idamaxsub.f)
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single complex precision
|
||||||
|
#
|
||||||
|
set (CLEV1 cblas_cswap.c cblas_cscal.c cblas_csscal.c cblas_ccopy.c
|
||||||
|
cblas_caxpy.c cblas_cdotu_sub.c cblas_cdotc_sub.c
|
||||||
|
cblas_icamax.c cdotcsub.f cdotusub.f icamaxsub.f)
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for double complex precision
|
||||||
|
#
|
||||||
|
set (ZLEV1 cblas_zswap.c cblas_zscal.c cblas_zdscal.c cblas_zcopy.c
|
||||||
|
cblas_zaxpy.c cblas_zdotu_sub.c cblas_zdotc_sub.c cblas_dznrm2.c
|
||||||
|
cblas_dzasum.c cblas_izamax.c zdotcsub.f zdotusub.f
|
||||||
|
dzasumsub.f dznrm2sub.f izamaxsub.f)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Common files for single complex precision
|
||||||
|
#
|
||||||
|
set (SCLEV1 cblas_scasum.c scasumsub.f cblas_scnrm2.c scnrm2sub.f)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files
|
||||||
|
#
|
||||||
|
set (ALEV1 ${slev1} ${dlev1} ${clev1} ${zlev1} ${sclev1})
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# CBLAS routines
|
||||||
|
#
|
||||||
|
# Level 2
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single real precision
|
||||||
|
#
|
||||||
|
set (SLEV2 cblas_sgemv.c cblas_sgbmv.c cblas_sger.c cblas_ssbmv.c cblas_sspmv.c
|
||||||
|
cblas_sspr.c cblas_sspr2.c cblas_ssymv.c cblas_ssyr.c cblas_ssyr2.c
|
||||||
|
cblas_stbmv.c cblas_stbsv.c cblas_stpmv.c cblas_stpsv.c cblas_strmv.c
|
||||||
|
cblas_strsv.c)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for double real precision
|
||||||
|
#
|
||||||
|
set (DLEV2 cblas_dgemv.c cblas_dgbmv.c cblas_dger.c cblas_dsbmv.c cblas_dspmv.c
|
||||||
|
cblas_dspr.c cblas_dspr2.c cblas_dsymv.c cblas_dsyr.c cblas_dsyr2.c
|
||||||
|
cblas_dtbmv.c cblas_dtbsv.c cblas_dtpmv.c cblas_dtpsv.c cblas_dtrmv.c
|
||||||
|
cblas_dtrsv.c)
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single complex precision
|
||||||
|
#
|
||||||
|
set (CLEV2 cblas_cgemv.c cblas_cgbmv.c cblas_chemv.c cblas_chbmv.c cblas_chpmv.c
|
||||||
|
cblas_ctrmv.c cblas_ctbmv.c cblas_ctpmv.c cblas_ctrsv.c cblas_ctbsv.c
|
||||||
|
cblas_ctpsv.c cblas_cgeru.c cblas_cgerc.c cblas_cher.c cblas_cher2.c
|
||||||
|
cblas_chpr.c cblas_chpr2.c)
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for double complex precision
|
||||||
|
#
|
||||||
|
set (ZLEV2 cblas_zgemv.c cblas_zgbmv.c cblas_zhemv.c cblas_zhbmv.c cblas_zhpmv.c
|
||||||
|
cblas_ztrmv.c cblas_ztbmv.c cblas_ztpmv.c cblas_ztrsv.c cblas_ztbsv.c
|
||||||
|
cblas_ztpsv.c cblas_zgeru.c cblas_zgerc.c cblas_zher.c cblas_zher2.c
|
||||||
|
cblas_zhpr.c cblas_zhpr2.c)
|
||||||
|
#
|
||||||
|
# All object files
|
||||||
|
#
|
||||||
|
set (AVEL2 ${slev2} ${dlev2} ${clev2} ${zlev2})
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# CBLAS routines
|
||||||
|
#
|
||||||
|
# Level 3
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single real precision
|
||||||
|
#
|
||||||
|
set (SLEV3 cblas_sgemm.c cblas_ssymm.c cblas_ssyrk.c cblas_ssyr2k.c cblas_strmm.c
|
||||||
|
cblas_strsm.c)
|
||||||
|
#
|
||||||
|
# All object files for double real precision
|
||||||
|
#
|
||||||
|
set (DLEV3 cblas_dgemm.c cblas_dsymm.c cblas_dsyrk.c cblas_dsyr2k.c cblas_dtrmm.c
|
||||||
|
cblas_dtrsm.c)
|
||||||
|
#
|
||||||
|
# All object files for single complex precision
|
||||||
|
#
|
||||||
|
set (CLEV3 cblas_cgemm.c cblas_csymm.c cblas_chemm.c cblas_cherk.c
|
||||||
|
cblas_cher2k.c cblas_ctrmm.c cblas_ctrsm.c cblas_csyrk.c
|
||||||
|
cblas_csyr2k.c)
|
||||||
|
#
|
||||||
|
# All object files for double complex precision
|
||||||
|
#
|
||||||
|
set (ZLEV3 cblas_zgemm.c cblas_zsymm.c cblas_zhemm.c cblas_zherk.c
|
||||||
|
cblas_zher2k.c cblas_ztrmm.c cblas_ztrsm.c cblas_zsyrk.c
|
||||||
|
cblas_zsyr2k.c)
|
||||||
|
#
|
||||||
|
# All object files
|
||||||
|
#
|
||||||
|
set (ALEV3 ${slev3} ${dlev3} ${clev3} ${zlev3})
|
||||||
|
|
||||||
|
# default build all of it
|
||||||
|
set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND}
|
||||||
|
${DLEV1} ${DLEV2} ${DLEV3}
|
||||||
|
${CLEV1} ${CLEV2} ${CLEV3}
|
||||||
|
${ZLEV1} ${ZLEV2} ${ZLEV3} )
|
||||||
|
|
||||||
|
# Single real precision
|
||||||
|
if(CBLAS_SINGLE)
|
||||||
|
set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND})
|
||||||
|
endif(CBLAS_SINGLE)
|
||||||
|
|
||||||
|
# Double real precision
|
||||||
|
if(CBLAS_DOUBLE)
|
||||||
|
set(ALLOBJ ${DLEV1} ${DLEV2} ${DLEV3} ${ERRHAND})
|
||||||
|
endif(CBLAS_DOUBLE)
|
||||||
|
|
||||||
|
# Single complex precision
|
||||||
|
if (CBLAS_COMPLEX)
|
||||||
|
set(ALLOBJ ${CLEV1} ${SCLEV1} ${CLEV2} ${CLEV3} ${ERRHAND})
|
||||||
|
endif(CBLAS_COMPLEX)
|
||||||
|
|
||||||
|
# Double complex precision
|
||||||
|
if (CBLAS_COMPLEX16)
|
||||||
|
set(ALLOBJ ${ZLEV1} ${ZLEV2} ${ZLEV3} ${ERRHAND})
|
||||||
|
endif(CBLAS_COMPLEX16)
|
||||||
|
|
||||||
|
add_library(cblas ${ALLOBJ})
|
||||||
|
target_link_libraries(cblas ${BLAS_LIBRARIES} )
|
||||||
|
lapack_install_library(cblas)
|
||||||
|
|
@ -0,0 +1,249 @@
|
||||||
|
# This Makefile compiles the CBLAS routines
|
||||||
|
#
|
||||||
|
include ../../make.inc
|
||||||
|
|
||||||
|
#
|
||||||
|
# Erase all object and archive files
|
||||||
|
#
|
||||||
|
all: cblaslib
|
||||||
|
|
||||||
|
clean:
|
||||||
|
rm -f *.o a.out core
|
||||||
|
|
||||||
|
# Error handling routines for level 2 & 3
|
||||||
|
|
||||||
|
errhand = cblas_globals.o cblas_xerbla.o xerbla.o
|
||||||
|
|
||||||
|
# Object files of all routines
|
||||||
|
alev = $(alev1) $(alev2) $(alev3) $(errhand)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# CBLAS routines
|
||||||
|
#
|
||||||
|
# Level 1
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single real precision
|
||||||
|
#
|
||||||
|
slev1 = cblas_srotg.o cblas_srotmg.o cblas_srot.o cblas_srotm.o \
|
||||||
|
cblas_sswap.o cblas_sscal.o cblas_scopy.o cblas_saxpy.o \
|
||||||
|
cblas_sdot.o cblas_sdsdot.o cblas_snrm2.o cblas_sasum.o \
|
||||||
|
cblas_isamax.o sdotsub.o sdsdotsub.o snrm2sub.o sasumsub.o \
|
||||||
|
isamaxsub.o
|
||||||
|
#
|
||||||
|
# All object files for double real precision
|
||||||
|
#
|
||||||
|
dlev1 = cblas_drotg.o cblas_drotmg.o cblas_drot.o cblas_drotm.o \
|
||||||
|
cblas_dswap.o cblas_dscal.o cblas_dcopy.o cblas_daxpy.o \
|
||||||
|
cblas_ddot.o cblas_dsdot.o cblas_dnrm2.o cblas_dasum.o \
|
||||||
|
cblas_idamax.o ddotsub.o dsdotsub.o dnrm2sub.o \
|
||||||
|
dasumsub.o idamaxsub.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single complex precision
|
||||||
|
#
|
||||||
|
clev1 = cblas_cswap.o cblas_cscal.o cblas_csscal.o cblas_ccopy.o \
|
||||||
|
cblas_caxpy.o cblas_cdotu_sub.o cblas_cdotc_sub.o \
|
||||||
|
cblas_icamax.o cdotcsub.o cdotusub.o icamaxsub.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for double complex precision
|
||||||
|
#
|
||||||
|
zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \
|
||||||
|
cblas_zaxpy.o cblas_zdotu_sub.o cblas_zdotc_sub.o cblas_dznrm2.o \
|
||||||
|
cblas_dzasum.o cblas_izamax.o zdotcsub.o zdotusub.o \
|
||||||
|
dzasumsub.o dznrm2sub.o izamaxsub.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# Common files for single / complex precision
|
||||||
|
#
|
||||||
|
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files
|
||||||
|
#
|
||||||
|
alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1)
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Make an archive file
|
||||||
|
#
|
||||||
|
|
||||||
|
# Single real precision
|
||||||
|
slib1: $(slev1) $(sclev1)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev1) $(sclev1)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Double real precision
|
||||||
|
dlib1: $(dlev1)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev1)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Single complex precision
|
||||||
|
clib1: $(clev1) $(sclev1)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev1) $(sclev1)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Double complex precision
|
||||||
|
zlib1: $(zlev1)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev1)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# All precisions
|
||||||
|
all1: $(alev1)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev1)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# CBLAS routines
|
||||||
|
#
|
||||||
|
# Level 2
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single real precision
|
||||||
|
#
|
||||||
|
slev2 = cblas_sgemv.o cblas_sgbmv.o cblas_sger.o cblas_ssbmv.o cblas_sspmv.o \
|
||||||
|
cblas_sspr.o cblas_sspr2.o cblas_ssymv.o cblas_ssyr.o cblas_ssyr2.o \
|
||||||
|
cblas_stbmv.o cblas_stbsv.o cblas_stpmv.o cblas_stpsv.o cblas_strmv.o \
|
||||||
|
cblas_strsv.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for double real precision
|
||||||
|
#
|
||||||
|
dlev2 = cblas_dgemv.o cblas_dgbmv.o cblas_dger.o cblas_dsbmv.o cblas_dspmv.o \
|
||||||
|
cblas_dspr.o cblas_dspr2.o cblas_dsymv.o cblas_dsyr.o cblas_dsyr2.o \
|
||||||
|
cblas_dtbmv.o cblas_dtbsv.o cblas_dtpmv.o cblas_dtpsv.o cblas_dtrmv.o \
|
||||||
|
cblas_dtrsv.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single complex precision
|
||||||
|
#
|
||||||
|
clev2 = cblas_cgemv.o cblas_cgbmv.o cblas_chemv.o cblas_chbmv.o cblas_chpmv.o \
|
||||||
|
cblas_ctrmv.o cblas_ctbmv.o cblas_ctpmv.o cblas_ctrsv.o cblas_ctbsv.o \
|
||||||
|
cblas_ctpsv.o cblas_cgeru.o cblas_cgerc.o cblas_cher.o cblas_cher2.o \
|
||||||
|
cblas_chpr.o cblas_chpr2.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for double complex precision
|
||||||
|
#
|
||||||
|
zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \
|
||||||
|
cblas_ztrmv.o cblas_ztbmv.o cblas_ztpmv.o cblas_ztrsv.o cblas_ztbsv.o \
|
||||||
|
cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \
|
||||||
|
cblas_zhpr.o cblas_zhpr2.o
|
||||||
|
#
|
||||||
|
# All object files
|
||||||
|
#
|
||||||
|
alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Make an archive file
|
||||||
|
#
|
||||||
|
|
||||||
|
# Single real precision
|
||||||
|
slib2: $(slev2) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev2) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Double real precision
|
||||||
|
dlib2: $(dlev2) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev2) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Single complex precision
|
||||||
|
clib2: $(clev2) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev2) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Double complex precision
|
||||||
|
zlib2: $(zlev2) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev2) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# All precisions
|
||||||
|
all2: $(alev2) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev2) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# CBLAS routines
|
||||||
|
#
|
||||||
|
# Level 3
|
||||||
|
#
|
||||||
|
#
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single real precision
|
||||||
|
#
|
||||||
|
slev3 = cblas_sgemm.o cblas_ssymm.o cblas_ssyrk.o cblas_ssyr2k.o cblas_strmm.o\
|
||||||
|
cblas_strsm.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for double real precision
|
||||||
|
#
|
||||||
|
dlev3 = cblas_dgemm.o cblas_dsymm.o cblas_dsyrk.o cblas_dsyr2k.o cblas_dtrmm.o\
|
||||||
|
cblas_dtrsm.o
|
||||||
|
|
||||||
|
#
|
||||||
|
# All object files for single complex precision
|
||||||
|
#
|
||||||
|
clev3 = cblas_cgemm.o cblas_csymm.o cblas_chemm.o cblas_cherk.o\
|
||||||
|
cblas_cher2k.o cblas_ctrmm.o cblas_ctrsm.o cblas_csyrk.o\
|
||||||
|
cblas_csyr2k.o
|
||||||
|
#
|
||||||
|
# All object files for double complex precision
|
||||||
|
#
|
||||||
|
zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o\
|
||||||
|
cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o\
|
||||||
|
cblas_zsyr2k.o
|
||||||
|
#
|
||||||
|
# All object files
|
||||||
|
#
|
||||||
|
alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3)
|
||||||
|
|
||||||
|
#
|
||||||
|
# Make an archive file
|
||||||
|
#
|
||||||
|
|
||||||
|
# Single real precision
|
||||||
|
slib3: $(slev3) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev3) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Double real precision
|
||||||
|
dlib3: $(dlev3) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev3) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Single complex precision
|
||||||
|
clib3: $(clev3) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev3) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# Single complex precision
|
||||||
|
zlib3: $(zlev3) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev3) $(errhand)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# All precisions
|
||||||
|
all3: $(alev3) $(errhand)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev3)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
# All levels and precisions
|
||||||
|
cblaslib: $(alev)
|
||||||
|
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev)
|
||||||
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
FRC:
|
||||||
|
@FRC=$(FRC)
|
||||||
|
|
||||||
|
.c.o:
|
||||||
|
$(CC) -c $(CFLAGS) -I ../include -o $@ $<
|
||||||
|
|
||||||
|
.f.o:
|
||||||
|
$(FORTRAN) $(OPTS) -c $< -o $@
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
/*
|
||||||
|
* cblas_caxpy.c
|
||||||
|
*
|
||||||
|
* The program is a C interface to caxpy.
|
||||||
|
*
|
||||||
|
* Written by Keita Teranishi. 2/11/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_caxpy( const int N, const void *alpha, const void *X,
|
||||||
|
const int incX, void *Y, const int incY)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
F77_caxpy( &F77_N, alpha, X, &F77_incX, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
/*
|
||||||
|
* cblas_ccopy.c
|
||||||
|
*
|
||||||
|
* The program is a C interface to ccopy.
|
||||||
|
*
|
||||||
|
* Written by Keita Teranishi. 2/11/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_ccopy( const int N, const void *X,
|
||||||
|
const int incX, void *Y, const int incY)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
F77_ccopy( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
/*
|
||||||
|
* cblas_cdotc_sub.c
|
||||||
|
*
|
||||||
|
* The program is a C interface to cdotc.
|
||||||
|
* It calls the fortran wrapper before calling cdotc.
|
||||||
|
*
|
||||||
|
* Written by Keita Teranishi. 2/11/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cdotc_sub( const int N, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *dotc)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
F77_cdotc_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotc);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,24 @@
|
||||||
|
/*
|
||||||
|
* cblas_cdotu_sub.c
|
||||||
|
*
|
||||||
|
* The program is a C interface to cdotu.
|
||||||
|
* It calls the fortran wrapper before calling cdotu.
|
||||||
|
*
|
||||||
|
* Written by Keita Teranishi. 2/11/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cdotu_sub( const int N, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *dotu)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
F77_cdotu_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotu);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,165 @@
|
||||||
|
/*
|
||||||
|
* cblas_cgbmv.c
|
||||||
|
* The program is a C interface of cgbmv
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cgbmv(const CBLAS_LAYOUT layout,
|
||||||
|
const CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const int KL, const int KU,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *X, const int incX, const void *beta,
|
||||||
|
void *Y, const int incY)
|
||||||
|
{
|
||||||
|
char TA;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TA;
|
||||||
|
#else
|
||||||
|
#define F77_TA &TA
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||||
|
F77_INT F77_KL=KL,F77_KU=KU;
|
||||||
|
#else
|
||||||
|
#define F77_M M
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_KL KL
|
||||||
|
#define F77_KU KU
|
||||||
|
#define F77_incX incx
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
int n=0, i=0, incx=incX;
|
||||||
|
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||||
|
float ALPHA[2],BETA[2];
|
||||||
|
int tincY, tincx;
|
||||||
|
float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (TransA == CblasNoTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasConjTrans) TA = 'C';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
#endif
|
||||||
|
F77_cgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, alpha,
|
||||||
|
A, &F77_lda, X, &F77_incX, beta, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (TransA == CblasNoTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
ALPHA[0]= *alp;
|
||||||
|
ALPHA[1]= -alp[1];
|
||||||
|
BETA[0]= *bet;
|
||||||
|
BETA[1]= -bet[1];
|
||||||
|
TA = 'N';
|
||||||
|
if (M > 0)
|
||||||
|
{
|
||||||
|
n = M << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
tx = x;
|
||||||
|
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1 ;
|
||||||
|
tincx = 2;
|
||||||
|
st= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
st = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x=tx;
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if( incY > 0 )
|
||||||
|
tincY = incY;
|
||||||
|
else
|
||||||
|
tincY = -incY;
|
||||||
|
|
||||||
|
y++;
|
||||||
|
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
i = tincY << 1;
|
||||||
|
n = i * N ;
|
||||||
|
st = y + n;
|
||||||
|
do {
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
} while(y != st);
|
||||||
|
y -= n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else x = (float *) X;
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
#endif
|
||||||
|
if (TransA == CblasConjTrans)
|
||||||
|
F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, ALPHA,
|
||||||
|
A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY);
|
||||||
|
else
|
||||||
|
F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, alpha,
|
||||||
|
A ,&F77_lda, x,&F77_incX, beta, Y, &F77_incY);
|
||||||
|
if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
if (x != X) free(x);
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
}
|
||||||
|
while (y != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_cgbmv", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,109 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* cblas_cgemm.c
|
||||||
|
* This program is a C interface to cgemm.
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
* 4/8/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA,
|
||||||
|
const CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||||
|
const int K, const void *alpha, const void *A,
|
||||||
|
const int lda, const void *B, const int ldb,
|
||||||
|
const void *beta, void *C, const int ldc)
|
||||||
|
{
|
||||||
|
char TA, TB;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TA, F77_TB;
|
||||||
|
#else
|
||||||
|
#define F77_TA &TA
|
||||||
|
#define F77_TB &TB
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
|
||||||
|
F77_INT F77_ldc=ldc;
|
||||||
|
#else
|
||||||
|
#define F77_M M
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_ldb ldb
|
||||||
|
#define F77_ldc ldc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if( layout == CblasColMajor )
|
||||||
|
{
|
||||||
|
if(TransA == CblasTrans) TA='T';
|
||||||
|
else if ( TransA == CblasConjTrans ) TA='C';
|
||||||
|
else if ( TransA == CblasNoTrans ) TA='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(TransB == CblasTrans) TB='T';
|
||||||
|
else if ( TransB == CblasConjTrans ) TB='C';
|
||||||
|
else if ( TransB == CblasNoTrans ) TB='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_TB = C2F_CHAR(&TB);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_cgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, alpha, A,
|
||||||
|
&F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if(TransA == CblasTrans) TB='T';
|
||||||
|
else if ( TransA == CblasConjTrans ) TB='C';
|
||||||
|
else if ( TransA == CblasNoTrans ) TB='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(TransB == CblasTrans) TA='T';
|
||||||
|
else if ( TransB == CblasConjTrans ) TA='C';
|
||||||
|
else if ( TransB == CblasNoTrans ) TA='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_TB = C2F_CHAR(&TB);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_cgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, alpha, B,
|
||||||
|
&F77_ldb, A, &F77_lda, beta, C, &F77_ldc);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_cgemm", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,162 @@
|
||||||
|
/*
|
||||||
|
* cblas_cgemv.c
|
||||||
|
* The program is a C interface of cgemv
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cgemv(const CBLAS_LAYOUT layout,
|
||||||
|
const CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *X, const int incX, const void *beta,
|
||||||
|
void *Y, const int incY)
|
||||||
|
{
|
||||||
|
char TA;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TA;
|
||||||
|
#else
|
||||||
|
#define F77_TA &TA
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_M M
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_incX incx
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int n=0, i=0, incx=incX;
|
||||||
|
const float *xx= (const float *)X;
|
||||||
|
float ALPHA[2],BETA[2];
|
||||||
|
int tincY, tincx;
|
||||||
|
float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0;
|
||||||
|
const float *stx = x;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (TransA == CblasNoTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasConjTrans) TA = 'C';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
#endif
|
||||||
|
F77_cgemv(F77_TA, &F77_M, &F77_N, alpha, A, &F77_lda, X, &F77_incX,
|
||||||
|
beta, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
|
||||||
|
if (TransA == CblasNoTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
ALPHA[0]= *( (const float *) alpha );
|
||||||
|
ALPHA[1]= -( *( (const float *) alpha+1) );
|
||||||
|
BETA[0]= *( (const float *) beta );
|
||||||
|
BETA[1]= -( *( (const float *) beta+1 ) );
|
||||||
|
TA = 'N';
|
||||||
|
if (M > 0)
|
||||||
|
{
|
||||||
|
n = M << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
tx = x;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1 ;
|
||||||
|
tincx = 2;
|
||||||
|
st= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
st = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x=tx;
|
||||||
|
|
||||||
|
F77_incX = 1;
|
||||||
|
|
||||||
|
if(incY > 0)
|
||||||
|
tincY = incY;
|
||||||
|
else
|
||||||
|
tincY = -incY;
|
||||||
|
|
||||||
|
y++;
|
||||||
|
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
i = tincY << 1;
|
||||||
|
n = i * N ;
|
||||||
|
st = y + n;
|
||||||
|
do {
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
} while(y != st);
|
||||||
|
y -= n;
|
||||||
|
}
|
||||||
|
stx = x;
|
||||||
|
}
|
||||||
|
else stx = (const float *)X;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
#endif
|
||||||
|
if (TransA == CblasConjTrans)
|
||||||
|
F77_cgemv(F77_TA, &F77_N, &F77_M, ALPHA, A, &F77_lda, stx,
|
||||||
|
&F77_incX, BETA, Y, &F77_incY);
|
||||||
|
else
|
||||||
|
F77_cgemv(F77_TA, &F77_N, &F77_M, alpha, A, &F77_lda, x,
|
||||||
|
&F77_incX, beta, Y, &F77_incY);
|
||||||
|
|
||||||
|
if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
if (x != (const float *)X) free(x);
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
}
|
||||||
|
while (y != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_cgemv", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,84 @@
|
||||||
|
/*
|
||||||
|
* cblas_cgerc.c
|
||||||
|
* The program is a C interface to cgerc.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cgerc(const CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_M M
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#define F77_incY incy
|
||||||
|
#define F77_lda lda
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int n, i, tincy, incy=incY;
|
||||||
|
float *y=(float *)Y, *yy=(float *)Y, *ty, *st;
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
F77_cgerc( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A,
|
||||||
|
&F77_lda);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
y = malloc(n*sizeof(float));
|
||||||
|
|
||||||
|
ty = y;
|
||||||
|
if( incY > 0 ) {
|
||||||
|
i = incY << 1;
|
||||||
|
tincy = 2;
|
||||||
|
st= y+n;
|
||||||
|
} else {
|
||||||
|
i = incY *(-2);
|
||||||
|
tincy = -2;
|
||||||
|
st = y-2;
|
||||||
|
y +=(n-2);
|
||||||
|
}
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = *yy;
|
||||||
|
y[1] = -yy[1];
|
||||||
|
y += tincy ;
|
||||||
|
yy += i;
|
||||||
|
}
|
||||||
|
while (y != st);
|
||||||
|
y = ty;
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incY = 1;
|
||||||
|
#else
|
||||||
|
incy = 1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else y = (float *) Y;
|
||||||
|
|
||||||
|
F77_cgeru( &F77_N, &F77_M, alpha, y, &F77_incY, X, &F77_incX, A,
|
||||||
|
&F77_lda);
|
||||||
|
if(Y!=y)
|
||||||
|
free(y);
|
||||||
|
|
||||||
|
} else cblas_xerbla(1, "cblas_cgerc", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* cblas_cgeru.c
|
||||||
|
* The program is a C interface to cgeru.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cgeru(const CBLAS_LAYOUT layout, const int M, const int N,
|
||||||
|
const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_M M
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#define F77_incY incY
|
||||||
|
#define F77_lda lda
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
F77_cgeru( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A,
|
||||||
|
&F77_lda);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
F77_cgeru( &F77_N, &F77_M, alpha, Y, &F77_incY, X, &F77_incX, A,
|
||||||
|
&F77_lda);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_cgeru","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,159 @@
|
||||||
|
/*
|
||||||
|
* cblas_chbmv.c
|
||||||
|
* The program is a C interface to chbmv
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/18/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
void cblas_chbmv(const CBLAS_LAYOUT layout,
|
||||||
|
const CBLAS_UPLO Uplo,const int N,const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *X, const int incX, const void *beta,
|
||||||
|
void *Y, const int incY)
|
||||||
|
{
|
||||||
|
char UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_incX incx
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
int n, i=0, incx=incX;
|
||||||
|
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||||
|
float ALPHA[2],BETA[2];
|
||||||
|
int tincY, tincx;
|
||||||
|
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n",Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
F77_chbmv(F77_UL, &F77_N, &F77_K, alpha, A, &F77_lda, X,
|
||||||
|
&F77_incX, beta, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
ALPHA[0]= *alp;
|
||||||
|
ALPHA[1]= -alp[1];
|
||||||
|
BETA[0]= *bet;
|
||||||
|
BETA[1]= -bet[1];
|
||||||
|
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
|
||||||
|
tx = x;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1 ;
|
||||||
|
tincx = 2;
|
||||||
|
st= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
st = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x=tx;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(incY > 0)
|
||||||
|
tincY = incY;
|
||||||
|
else
|
||||||
|
tincY = -incY;
|
||||||
|
y++;
|
||||||
|
|
||||||
|
i = tincY << 1;
|
||||||
|
n = i * N ;
|
||||||
|
st = y + n;
|
||||||
|
do {
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
} while(y != st);
|
||||||
|
y -= n;
|
||||||
|
} else
|
||||||
|
x = (float *) X;
|
||||||
|
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
F77_chbmv(F77_UL, &F77_N, &F77_K, ALPHA,
|
||||||
|
A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(1, "cblas_chbmv","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( layout == CblasRowMajor )
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if(X!=x)
|
||||||
|
free(x);
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
}
|
||||||
|
while (y != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* cblas_chemm.c
|
||||||
|
* This program is a C interface to chemm.
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
* 4/8/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_chemm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side,
|
||||||
|
const CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc)
|
||||||
|
{
|
||||||
|
char SD, UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_SD, F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_SD &SD
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb;
|
||||||
|
F77_INT F77_ldc=ldc;
|
||||||
|
#else
|
||||||
|
#define F77_M M
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_ldb ldb
|
||||||
|
#define F77_ldc ldc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if( layout == CblasColMajor )
|
||||||
|
{
|
||||||
|
if( Side == CblasRight) SD='R';
|
||||||
|
else if ( Side == CblasLeft ) SD='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='U';
|
||||||
|
else if ( Uplo == CblasLower ) UL='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_SD = C2F_CHAR(&SD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_chemm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda,
|
||||||
|
B, &F77_ldb, beta, C, &F77_ldc);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if( Side == CblasRight) SD='L';
|
||||||
|
else if ( Side == CblasLeft ) SD='R';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='L';
|
||||||
|
else if ( Uplo == CblasLower ) UL='U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_SD = C2F_CHAR(&SD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_chemm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A,
|
||||||
|
&F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_chemm", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,160 @@
|
||||||
|
/*
|
||||||
|
* cblas_chemv.c
|
||||||
|
* The program is a C interface to chemv
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/18/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_chemv(const CBLAS_LAYOUT layout,
|
||||||
|
const CBLAS_UPLO Uplo, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *X, const int incX, const void *beta,
|
||||||
|
void *Y, const int incY)
|
||||||
|
{
|
||||||
|
char UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_incX incx
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
int n=0, i=0, incx=incX;
|
||||||
|
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||||
|
float ALPHA[2],BETA[2];
|
||||||
|
int tincY, tincx;
|
||||||
|
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n",Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
F77_chemv(F77_UL, &F77_N, alpha, A, &F77_lda, X, &F77_incX,
|
||||||
|
beta, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
ALPHA[0]= *alp;
|
||||||
|
ALPHA[1]= -alp[1];
|
||||||
|
BETA[0]= *bet;
|
||||||
|
BETA[1]= -bet[1];
|
||||||
|
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
|
||||||
|
tx = x;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1 ;
|
||||||
|
tincx = 2;
|
||||||
|
st= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
st = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x=tx;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(incY > 0)
|
||||||
|
tincY = incY;
|
||||||
|
else
|
||||||
|
tincY = -incY;
|
||||||
|
y++;
|
||||||
|
|
||||||
|
i = tincY << 1;
|
||||||
|
n = i * N ;
|
||||||
|
st = y + n;
|
||||||
|
do {
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
} while(y != st);
|
||||||
|
y -= n;
|
||||||
|
} else
|
||||||
|
x = (float *) X;
|
||||||
|
|
||||||
|
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
F77_chemv(F77_UL, &F77_N, ALPHA, A, &F77_lda, x, &F77_incX,
|
||||||
|
BETA, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(1, "cblas_chemv","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( layout == CblasRowMajor )
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if ( X != x )
|
||||||
|
free(x);
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
}
|
||||||
|
while (y != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,116 @@
|
||||||
|
/*
|
||||||
|
* cblas_cher.c
|
||||||
|
* The program is a C interface to cher.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cher(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const void *X, const int incX
|
||||||
|
,void *A, const int lda)
|
||||||
|
{
|
||||||
|
char UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_incX incx
|
||||||
|
#endif
|
||||||
|
int n, i, tincx, incx=incX;
|
||||||
|
float *x=(float *)X, *xx=(float *)X, *tx, *st;
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n",Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_cher(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda);
|
||||||
|
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
tx = x;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1 ;
|
||||||
|
tincx = 2;
|
||||||
|
st= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
st = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x=tx;
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else x = (float *) X;
|
||||||
|
F77_cher(F77_UL, &F77_N, &alpha, x, &F77_incX, A, &F77_lda);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
cblas_xerbla(1, "cblas_cher","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(X!=x)
|
||||||
|
free(x);
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,152 @@
|
||||||
|
/*
|
||||||
|
* cblas_cher2.c
|
||||||
|
* The program is a C interface to cher2.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 3/23/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cher2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const int N, const void *alpha, const void *X, const int incX,
|
||||||
|
const void *Y, const int incY, void *A, const int lda)
|
||||||
|
{
|
||||||
|
char UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_incX incx
|
||||||
|
#define F77_incY incy
|
||||||
|
#endif
|
||||||
|
int n, i, j, tincx, tincy, incx=incX, incy=incY;
|
||||||
|
float *x=(float *)X, *xx=(float *)X, *y=(float *)Y,
|
||||||
|
*yy=(float *)Y, *tx, *ty, *stx, *sty;
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n",Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_cher2(F77_UL, &F77_N, alpha, X, &F77_incX,
|
||||||
|
Y, &F77_incY, A, &F77_lda);
|
||||||
|
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
y = malloc(n*sizeof(float));
|
||||||
|
tx = x;
|
||||||
|
ty = y;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1 ;
|
||||||
|
tincx = 2;
|
||||||
|
stx= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
stx = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( incY > 0 ) {
|
||||||
|
j = incY << 1;
|
||||||
|
tincy = 2;
|
||||||
|
sty= y+n;
|
||||||
|
} else {
|
||||||
|
j = incY *(-2);
|
||||||
|
tincy = -2;
|
||||||
|
sty = y-2;
|
||||||
|
y +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != stx);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = *yy;
|
||||||
|
y[1] = -yy[1];
|
||||||
|
y += tincy ;
|
||||||
|
yy += j;
|
||||||
|
}
|
||||||
|
while (y != sty);
|
||||||
|
|
||||||
|
x=tx;
|
||||||
|
y=ty;
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
F77_incY = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
incy = 1;
|
||||||
|
#endif
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
x = (float *) X;
|
||||||
|
y = (float *) Y;
|
||||||
|
}
|
||||||
|
F77_cher2(F77_UL, &F77_N, alpha, y, &F77_incY, x,
|
||||||
|
&F77_incX, A, &F77_lda);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
cblas_xerbla(1, "cblas_cher2","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(X!=x)
|
||||||
|
free(x);
|
||||||
|
if(Y!=y)
|
||||||
|
free(y);
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,111 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* cblas_cher2k.c
|
||||||
|
* This program is a C interface to cher2k.
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
* 4/8/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cher2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const float beta,
|
||||||
|
void *C, const int ldc)
|
||||||
|
{
|
||||||
|
char UL, TR;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TR, F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_TR &TR
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
|
||||||
|
F77_INT F77_ldc=ldc;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_ldb ldb
|
||||||
|
#define F77_ldc ldc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
float ALPHA[2];
|
||||||
|
const float *alp=(float *)alpha;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
if( layout == CblasColMajor )
|
||||||
|
{
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='U';
|
||||||
|
else if ( Uplo == CblasLower ) UL='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Trans == CblasTrans) TR ='T';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='C';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TR = C2F_CHAR(&TR);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_cher2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='L';
|
||||||
|
else if ( Uplo == CblasLower ) UL='U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if( Trans == CblasTrans) TR ='N';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='N';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='C';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TR = C2F_CHAR(&TR);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ALPHA[0]= *alp;
|
||||||
|
ALPHA[1]= -alp[1];
|
||||||
|
F77_cher2k(F77_UL,F77_TR, &F77_N, &F77_K, ALPHA, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_cher2k", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,105 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* cblas_cherk.c
|
||||||
|
* This program is a C interface to cherk.
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
* 4/8/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cherk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const float alpha, const void *A, const int lda,
|
||||||
|
const float beta, void *C, const int ldc)
|
||||||
|
{
|
||||||
|
char UL, TR;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TR, F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_TR &TR
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_K=K, F77_lda=lda;
|
||||||
|
F77_INT F77_ldc=ldc;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_ldc ldc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if( layout == CblasColMajor )
|
||||||
|
{
|
||||||
|
if( Uplo == CblasUpper) UL='U';
|
||||||
|
else if ( Uplo == CblasLower ) UL='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Trans == CblasTrans) TR ='T';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='C';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TR = C2F_CHAR(&TR);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda,
|
||||||
|
&beta, C, &F77_ldc);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if( Uplo == CblasUpper) UL='L';
|
||||||
|
else if ( Uplo == CblasLower ) UL='U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if( Trans == CblasTrans) TR ='N';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='N';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='C';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_SD = C2F_CHAR(&SD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda,
|
||||||
|
&beta, C, &F77_ldc);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_cherk", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,160 @@
|
||||||
|
/*
|
||||||
|
* cblas_chpmv.c
|
||||||
|
* The program is a C interface of chpmv
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/18/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_chpmv(const CBLAS_LAYOUT layout,
|
||||||
|
const CBLAS_UPLO Uplo,const int N,
|
||||||
|
const void *alpha, const void *AP,
|
||||||
|
const void *X, const int incX, const void *beta,
|
||||||
|
void *Y, const int incY)
|
||||||
|
{
|
||||||
|
char UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incx
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
int n, i=0, incx=incX;
|
||||||
|
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||||
|
float ALPHA[2],BETA[2];
|
||||||
|
int tincY, tincx;
|
||||||
|
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n",Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
F77_chpmv(F77_UL, &F77_N, alpha, AP, X,
|
||||||
|
&F77_incX, beta, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
ALPHA[0]= *alp;
|
||||||
|
ALPHA[1]= -alp[1];
|
||||||
|
BETA[0]= *bet;
|
||||||
|
BETA[1]= -bet[1];
|
||||||
|
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
|
||||||
|
tx = x;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1;
|
||||||
|
tincx = 2;
|
||||||
|
st= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
st = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x=tx;
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if(incY > 0)
|
||||||
|
tincY = incY;
|
||||||
|
else
|
||||||
|
tincY = -incY;
|
||||||
|
y++;
|
||||||
|
|
||||||
|
i = tincY << 1;
|
||||||
|
n = i * N ;
|
||||||
|
st = y + n;
|
||||||
|
do {
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
} while(y != st);
|
||||||
|
y -= n;
|
||||||
|
} else
|
||||||
|
x = (float *) X;
|
||||||
|
|
||||||
|
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n", Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_chpmv(F77_UL, &F77_N, ALPHA,
|
||||||
|
AP, x, &F77_incX, BETA, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(1, "cblas_chpmv","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if ( layout == CblasRowMajor )
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if(X!=x)
|
||||||
|
free(x);
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = -(*y);
|
||||||
|
y += i;
|
||||||
|
}
|
||||||
|
while (y != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,115 @@
|
||||||
|
/*
|
||||||
|
* cblas_chpr.c
|
||||||
|
* The program is a C interface to chpr.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 3/23/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_chpr(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const int N, const float alpha, const void *X,
|
||||||
|
const int incX, void *A)
|
||||||
|
{
|
||||||
|
char UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incx
|
||||||
|
#endif
|
||||||
|
int n, i, tincx, incx=incX;
|
||||||
|
float *x=(float *)X, *xx=(float *)X, *tx, *st;
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n",Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_chpr(F77_UL, &F77_N, &alpha, X, &F77_incX, A);
|
||||||
|
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
tx = x;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1;
|
||||||
|
tincx = 2;
|
||||||
|
st= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
st = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x=tx;
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else x = (float *) X;
|
||||||
|
|
||||||
|
F77_chpr(F77_UL, &F77_N, &alpha, x, &F77_incX, A);
|
||||||
|
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
cblas_xerbla(1, "cblas_chpr","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(X!=x)
|
||||||
|
free(x);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,149 @@
|
||||||
|
/*
|
||||||
|
* cblas_chpr2.c
|
||||||
|
* The program is a C interface to chpr2.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_chpr2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const int N,const void *alpha, const void *X,
|
||||||
|
const int incX,const void *Y, const int incY, void *Ap)
|
||||||
|
|
||||||
|
{
|
||||||
|
char UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incx
|
||||||
|
#define F77_incY incy
|
||||||
|
#endif
|
||||||
|
int n, i, j, tincx, tincy, incx=incX, incy=incY;
|
||||||
|
float *x=(float *)X, *xx=(float *)X, *y=(float *)Y,
|
||||||
|
*yy=(float *)Y, *tx, *ty, *stx, *sty;
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n",Uplo );
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_chpr2(F77_UL, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, Ap);
|
||||||
|
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
#endif
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
n = N << 1;
|
||||||
|
x = malloc(n*sizeof(float));
|
||||||
|
y = malloc(n*sizeof(float));
|
||||||
|
tx = x;
|
||||||
|
ty = y;
|
||||||
|
if( incX > 0 ) {
|
||||||
|
i = incX << 1 ;
|
||||||
|
tincx = 2;
|
||||||
|
stx= x+n;
|
||||||
|
} else {
|
||||||
|
i = incX *(-2);
|
||||||
|
tincx = -2;
|
||||||
|
stx = x-2;
|
||||||
|
x +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
if( incY > 0 ) {
|
||||||
|
j = incY << 1;
|
||||||
|
tincy = 2;
|
||||||
|
sty= y+n;
|
||||||
|
} else {
|
||||||
|
j = incY *(-2);
|
||||||
|
tincy = -2;
|
||||||
|
sty = y-2;
|
||||||
|
y +=(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = *xx;
|
||||||
|
x[1] = -xx[1];
|
||||||
|
x += tincx ;
|
||||||
|
xx += i;
|
||||||
|
}
|
||||||
|
while (x != stx);
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*y = *yy;
|
||||||
|
y[1] = -yy[1];
|
||||||
|
y += tincy ;
|
||||||
|
yy += j;
|
||||||
|
}
|
||||||
|
while (y != sty);
|
||||||
|
|
||||||
|
x=tx;
|
||||||
|
y=ty;
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_incX = 1;
|
||||||
|
F77_incY = 1;
|
||||||
|
#else
|
||||||
|
incx = 1;
|
||||||
|
incy = 1;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
x = (float *) X;
|
||||||
|
y = (void *) Y;
|
||||||
|
}
|
||||||
|
F77_chpr2(F77_UL, &F77_N, alpha, y, &F77_incY, x, &F77_incX, Ap);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
cblas_xerbla(1, "cblas_chpr2","Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if(X!=x)
|
||||||
|
free(x);
|
||||||
|
if(Y!=y)
|
||||||
|
free(y);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
/*
|
||||||
|
* cblas_cscal.c
|
||||||
|
*
|
||||||
|
* The program is a C interface to cscal.
|
||||||
|
*
|
||||||
|
* Written by Keita Teranishi. 2/11/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cscal( const int N, const void *alpha, void *X,
|
||||||
|
const int incX)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#endif
|
||||||
|
F77_cscal( &F77_N, alpha, X, &F77_incX);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,21 @@
|
||||||
|
/*
|
||||||
|
* cblas_csscal.c
|
||||||
|
*
|
||||||
|
* The program is a C interface to csscal.
|
||||||
|
*
|
||||||
|
* Written by Keita Teranishi. 2/11/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_csscal( const int N, const float alpha, void *X,
|
||||||
|
const int incX)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#endif
|
||||||
|
F77_csscal( &F77_N, &alpha, X, &F77_incX);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
/*
|
||||||
|
* cblas_cswap.c
|
||||||
|
*
|
||||||
|
* The program is a C interface to cswap.
|
||||||
|
*
|
||||||
|
* Written by Keita Teranishi. 2/11/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_cswap( const int N, void *X, const int incX, void *Y,
|
||||||
|
const int incY)
|
||||||
|
{
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#define F77_incY incY
|
||||||
|
#endif
|
||||||
|
F77_cswap( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* cblas_csymm.c
|
||||||
|
* This program is a C interface to csymm.
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
* 4/8/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_csymm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side,
|
||||||
|
const CBLAS_UPLO Uplo, const int M, const int N,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc)
|
||||||
|
{
|
||||||
|
char SD, UL;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_SD, F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_SD &SD
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb;
|
||||||
|
F77_INT F77_ldc=ldc;
|
||||||
|
#else
|
||||||
|
#define F77_M M
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_ldb ldb
|
||||||
|
#define F77_ldc ldc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if( layout == CblasColMajor )
|
||||||
|
{
|
||||||
|
if( Side == CblasRight) SD='R';
|
||||||
|
else if ( Side == CblasLeft ) SD='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='U';
|
||||||
|
else if ( Uplo == CblasLower ) UL='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_SD = C2F_CHAR(&SD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_csymm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda,
|
||||||
|
B, &F77_ldb, beta, C, &F77_ldc);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if( Side == CblasRight) SD='L';
|
||||||
|
else if ( Side == CblasLeft ) SD='R';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='L';
|
||||||
|
else if ( Uplo == CblasLower ) UL='U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_SD = C2F_CHAR(&SD);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_csymm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, &F77_lda,
|
||||||
|
B, &F77_ldb, beta, C, &F77_ldc);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_csymm", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* cblas_csyr2k.c
|
||||||
|
* This program is a C interface to csyr2k.
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
* 4/8/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_csyr2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *B, const int ldb, const void *beta,
|
||||||
|
void *C, const int ldc)
|
||||||
|
{
|
||||||
|
char UL, TR;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TR, F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_TR &TR
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
|
||||||
|
F77_INT F77_ldc=ldc;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_ldb ldb
|
||||||
|
#define F77_ldc ldc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if( layout == CblasColMajor )
|
||||||
|
{
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='U';
|
||||||
|
else if ( Uplo == CblasLower ) UL='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Trans == CblasTrans) TR ='T';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='C';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TR = C2F_CHAR(&TR);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
|
||||||
|
B, &F77_ldb, beta, C, &F77_ldc);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if( Uplo == CblasUpper) UL='L';
|
||||||
|
else if ( Uplo == CblasLower ) UL='U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if( Trans == CblasTrans) TR ='N';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='N';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='T';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TR = C2F_CHAR(&TR);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_csyr2k", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,108 @@
|
||||||
|
/*
|
||||||
|
*
|
||||||
|
* cblas_csyrk.c
|
||||||
|
* This program is a C interface to csyrk.
|
||||||
|
* Written by Keita Teranishi
|
||||||
|
* 4/8/1998
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_csyrk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||||
|
const void *alpha, const void *A, const int lda,
|
||||||
|
const void *beta, void *C, const int ldc)
|
||||||
|
{
|
||||||
|
char UL, TR;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TR, F77_UL;
|
||||||
|
#else
|
||||||
|
#define F77_TR &TR
|
||||||
|
#define F77_UL &UL
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_K=K, F77_lda=lda;
|
||||||
|
F77_INT F77_ldc=ldc;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_ldc ldc
|
||||||
|
#endif
|
||||||
|
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
|
||||||
|
if( layout == CblasColMajor )
|
||||||
|
{
|
||||||
|
|
||||||
|
if( Uplo == CblasUpper) UL='U';
|
||||||
|
else if ( Uplo == CblasLower ) UL='L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( Trans == CblasTrans) TR ='T';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='C';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TR = C2F_CHAR(&TR);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
|
||||||
|
beta, C, &F77_ldc);
|
||||||
|
} else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if( Uplo == CblasUpper) UL='L';
|
||||||
|
else if ( Uplo == CblasLower ) UL='U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if( Trans == CblasTrans) TR ='N';
|
||||||
|
else if ( Trans == CblasConjTrans ) TR='N';
|
||||||
|
else if ( Trans == CblasNoTrans ) TR='T';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TR = C2F_CHAR(&TR);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
|
||||||
|
beta, C, &F77_ldc);
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_csyrk", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -0,0 +1,158 @@
|
||||||
|
/*
|
||||||
|
* cblas_ctbmv.c
|
||||||
|
* The program is a C interface to ctbmv.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_ctbmv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const void *A, const int lda,
|
||||||
|
void *X, const int incX)
|
||||||
|
{
|
||||||
|
char TA;
|
||||||
|
char UL;
|
||||||
|
char DI;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TA, F77_UL, F77_DI;
|
||||||
|
#else
|
||||||
|
#define F77_TA &TA
|
||||||
|
#define F77_UL &UL
|
||||||
|
#define F77_DI &DI
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_incX incX
|
||||||
|
#endif
|
||||||
|
int n, i=0, tincX;
|
||||||
|
float *st=0, *x=(float *)X;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (TransA == CblasNoTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasConjTrans) TA = 'C';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (Diag == CblasUnit) DI = 'U';
|
||||||
|
else if (Diag == CblasNonUnit) DI = 'N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(4, "cblas_ctbmv","Illegal Diag setting, %d\n", Diag);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_DI = C2F_CHAR(&DI);
|
||||||
|
#endif
|
||||||
|
F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||||
|
&F77_incX);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TransA == CblasNoTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
TA = 'N';
|
||||||
|
if ( N > 0)
|
||||||
|
{
|
||||||
|
if(incX > 0)
|
||||||
|
tincX = incX;
|
||||||
|
else
|
||||||
|
tincX = -incX;
|
||||||
|
i = tincX << 1;
|
||||||
|
n = i * N;
|
||||||
|
x++;
|
||||||
|
st = x + n;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = -(*x);
|
||||||
|
x+= i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x -= n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Diag == CblasUnit) DI = 'U';
|
||||||
|
else if (Diag == CblasNonUnit) DI = 'N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(4, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_DI = C2F_CHAR(&DI);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||||
|
&F77_incX);
|
||||||
|
|
||||||
|
if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = -(*x);
|
||||||
|
x += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_ctbmv", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,162 @@
|
||||||
|
/*
|
||||||
|
* cblas_ctbsv.c
|
||||||
|
* The program is a C interface to ctbsv.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 3/23/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_ctbsv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
|
||||||
|
const int N, const int K, const void *A, const int lda,
|
||||||
|
void *X, const int incX)
|
||||||
|
{
|
||||||
|
char TA;
|
||||||
|
char UL;
|
||||||
|
char DI;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TA, F77_UL, F77_DI;
|
||||||
|
#else
|
||||||
|
#define F77_TA &TA
|
||||||
|
#define F77_UL &UL
|
||||||
|
#define F77_DI &DI
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_K K
|
||||||
|
#define F77_lda lda
|
||||||
|
#define F77_incX incX
|
||||||
|
#endif
|
||||||
|
int n, i=0, tincX;
|
||||||
|
float *st=0,*x=(float *)X;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (TransA == CblasNoTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasConjTrans) TA = 'C';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (Diag == CblasUnit) DI = 'U';
|
||||||
|
else if (Diag == CblasNonUnit) DI = 'N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_DI = C2F_CHAR(&DI);
|
||||||
|
#endif
|
||||||
|
F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||||
|
&F77_incX);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TransA == CblasNoTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
TA = 'N';
|
||||||
|
if ( N > 0)
|
||||||
|
{
|
||||||
|
if ( incX > 0 )
|
||||||
|
tincX = incX;
|
||||||
|
else
|
||||||
|
tincX = -incX;
|
||||||
|
|
||||||
|
n = N*2*(tincX);
|
||||||
|
|
||||||
|
x++;
|
||||||
|
|
||||||
|
st=x+n;
|
||||||
|
|
||||||
|
i = tincX << 1;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = -(*x);
|
||||||
|
x+=i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x -= n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Diag == CblasUnit) DI = 'U';
|
||||||
|
else if (Diag == CblasNonUnit) DI = 'N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_DI = C2F_CHAR(&DI);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||||
|
&F77_incX);
|
||||||
|
|
||||||
|
if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = -(*x);
|
||||||
|
x+= i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_ctbsv", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,152 @@
|
||||||
|
/*
|
||||||
|
* cblas_ctpmv.c
|
||||||
|
* The program is a C interface to ctpmv.
|
||||||
|
*
|
||||||
|
* Keita Teranishi 5/20/98
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#include "cblas.h"
|
||||||
|
#include "cblas_f77.h"
|
||||||
|
void cblas_ctpmv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||||
|
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
|
||||||
|
const int N, const void *Ap, void *X, const int incX)
|
||||||
|
{
|
||||||
|
char TA;
|
||||||
|
char UL;
|
||||||
|
char DI;
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_CHAR F77_TA, F77_UL, F77_DI;
|
||||||
|
#else
|
||||||
|
#define F77_TA &TA
|
||||||
|
#define F77_UL &UL
|
||||||
|
#define F77_DI &DI
|
||||||
|
#endif
|
||||||
|
#ifdef F77_INT
|
||||||
|
F77_INT F77_N=N, F77_incX=incX;
|
||||||
|
#else
|
||||||
|
#define F77_N N
|
||||||
|
#define F77_incX incX
|
||||||
|
#endif
|
||||||
|
int n, i=0, tincX;
|
||||||
|
float *st=0,*x=(float *)X;
|
||||||
|
extern int CBLAS_CallFromC;
|
||||||
|
extern int RowMajorStrg;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
|
||||||
|
CBLAS_CallFromC = 1;
|
||||||
|
if (layout == CblasColMajor)
|
||||||
|
{
|
||||||
|
if (Uplo == CblasUpper) UL = 'U';
|
||||||
|
else if (Uplo == CblasLower) UL = 'L';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (TransA == CblasNoTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasConjTrans) TA = 'C';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (Diag == CblasUnit) DI = 'U';
|
||||||
|
else if (Diag == CblasNonUnit) DI = 'N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_DI = C2F_CHAR(&DI);
|
||||||
|
#endif
|
||||||
|
F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX);
|
||||||
|
}
|
||||||
|
else if (layout == CblasRowMajor)
|
||||||
|
{
|
||||||
|
RowMajorStrg = 1;
|
||||||
|
if (Uplo == CblasUpper) UL = 'L';
|
||||||
|
else if (Uplo == CblasLower) UL = 'U';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (TransA == CblasNoTrans) TA = 'T';
|
||||||
|
else if (TransA == CblasTrans) TA = 'N';
|
||||||
|
else if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
TA = 'N';
|
||||||
|
if ( N > 0)
|
||||||
|
{
|
||||||
|
if(incX > 0)
|
||||||
|
tincX = incX;
|
||||||
|
else
|
||||||
|
tincX = -incX;
|
||||||
|
i = tincX << 1;
|
||||||
|
n = i * N;
|
||||||
|
x++;
|
||||||
|
st = x + n;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = -(*x);
|
||||||
|
x += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
x -= n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Diag == CblasUnit) DI = 'U';
|
||||||
|
else if (Diag == CblasNonUnit) DI = 'N';
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#ifdef F77_CHAR
|
||||||
|
F77_UL = C2F_CHAR(&UL);
|
||||||
|
F77_TA = C2F_CHAR(&TA);
|
||||||
|
F77_DI = C2F_CHAR(&DI);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX);
|
||||||
|
if (TransA == CblasConjTrans)
|
||||||
|
{
|
||||||
|
if (N > 0)
|
||||||
|
{
|
||||||
|
do
|
||||||
|
{
|
||||||
|
*x = -(*x);
|
||||||
|
x += i;
|
||||||
|
}
|
||||||
|
while (x != st);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else cblas_xerbla(1, "cblas_ctpmv", "Illegal layout setting, %d\n", layout);
|
||||||
|
CBLAS_CallFromC = 0;
|
||||||
|
RowMajorStrg = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue