Merge pull request #696 from ashwinyes/develop_20151120_lapack_test_fixes
Cortex A57 fixes and Lapack 3.6.0
This commit is contained in:
commit
b4380acf77
16
Makefile
16
Makefile
|
@ -249,10 +249,14 @@ ifndef NOFORTRAN
|
|||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(FC), gfortran)
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifdef SMP
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
else
|
||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
|
@ -288,7 +292,17 @@ endif
|
|||
lapack-test :
|
||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
|
||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
|
||||
ifneq ($(CROSS), 1)
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
endif
|
||||
|
||||
lapack-runtest:
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)
|
||||
|
|
|
@ -971,16 +971,29 @@ ifeq ($(DEBUG), 1)
|
|||
COMMON_OPT += -g
|
||||
endif
|
||||
|
||||
ifeq ($(DEBUG), 1)
|
||||
FCOMMON_OPT += -g
|
||||
endif
|
||||
|
||||
ifndef COMMON_OPT
|
||||
COMMON_OPT = -O2
|
||||
endif
|
||||
|
||||
ifndef FCOMMON_OPT
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
FCOMMON_OPT = -O0
|
||||
else
|
||||
FCOMMON_OPT = -O2 -frecursive
|
||||
endif
|
||||
endif
|
||||
|
||||
|
||||
|
||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
|
||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
|
||||
|
||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
override FFLAGS += $(FCOMMON_OPT)
|
||||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
|
||||
#MAKEOVERRIDES =
|
||||
|
||||
#For LAPACK Fortran codes.
|
||||
|
|
|
@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifndef ASSEMBLER
|
||||
|
||||
|
||||
static void __inline blas_lock(volatile BLASULONG *address){
|
||||
|
||||
long register ret;
|
||||
BLASULONG ret;
|
||||
|
||||
do {
|
||||
while (*address) {YIELDING;};
|
||||
|
||||
__asm__ __volatile__(
|
||||
"ldaxr %0, [%1] \n\t"
|
||||
"stlxr w2, %2, [%1] \n\t"
|
||||
"orr %0, %0, x2 \n\t"
|
||||
: "=r"(ret)
|
||||
: "r"(address), "r"(1l)
|
||||
: "memory", "x2"
|
||||
"mov x4, #1 \n\t"
|
||||
"1: \n\t"
|
||||
"ldaxr x2, [%1] \n\t"
|
||||
"cbnz x2, 1b \n\t"
|
||||
"2: \n\t"
|
||||
"stxr w3, x4, [%1] \n\t"
|
||||
"cbnz w3, 1b \n\t"
|
||||
"mov %0, #0 \n\t"
|
||||
: "=r"(ret), "=r"(address)
|
||||
: "1"(address)
|
||||
: "memory", "x2" , "x3", "x4"
|
||||
|
||||
|
||||
);
|
||||
|
||||
|
||||
} while (ret);
|
||||
MB;
|
||||
|
||||
}
|
||||
|
||||
#define BLAS_LOCK_DEFINED
|
||||
|
||||
|
||||
|
||||
static inline int blas_quickdivide(blasint x, blasint y){
|
||||
return x / y;
|
||||
}
|
||||
|
@ -110,7 +121,7 @@ REALNAME:
|
|||
#define HUGE_PAGESIZE ( 4 << 20)
|
||||
|
||||
#if defined(CORTEXA57)
|
||||
#define BUFFER_SIZE (40 << 20)
|
||||
#define BUFFER_SIZE (20 << 20)
|
||||
#else
|
||||
#define BUFFER_SIZE (16 << 20)
|
||||
#endif
|
||||
|
|
|
@ -104,6 +104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#include <errno.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
#endif
|
||||
|
||||
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
|
||||
|
@ -1361,6 +1363,18 @@ void CONSTRUCTOR gotoblas_init(void) {
|
|||
gotoblas_memory_init();
|
||||
#endif
|
||||
|
||||
#if defined(OS_LINUX)
|
||||
struct rlimit curlimit;
|
||||
if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 )
|
||||
{
|
||||
if ( curlimit.rlim_cur != curlimit.rlim_max )
|
||||
{
|
||||
curlimit.rlim_cur = curlimit.rlim_max;
|
||||
setrlimit(RLIMIT_STACK, &curlimit);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
if (blas_cpu_number == 0) blas_get_cpu_number();
|
||||
#ifdef SMP_SERVER
|
||||
|
|
|
@ -173,18 +173,18 @@
|
|||
sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv,
|
||||
sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2,
|
||||
sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx,
|
||||
sgegs, sgegv, sgehd2, sgehrd, sgelq2, sgelqf,
|
||||
sgels, sgelsd, sgelss, sgelsx, sgelsy, sgeql2, sgeqlf,
|
||||
sgeqp3, sgeqpf, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
|
||||
sgehd2, sgehrd, sgelq2, sgelqf,
|
||||
sgels, sgelsd, sgelss, sgelsy, sgeql2, sgeqlf,
|
||||
sgeqp3, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
|
||||
sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx,
|
||||
sgetc2, sgetri,
|
||||
sggbak, sggbal, sgges, sggesx, sggev, sggevx,
|
||||
sggglm, sgghrd, sgglse, sggqrf,
|
||||
sggrqf, sggsvd, sggsvp, sgtcon, sgtrfs, sgtsv,
|
||||
sggrqf, sgtcon, sgtrfs, sgtsv,
|
||||
sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz,
|
||||
shsein, shseqr, slabrd, slacon, slacn2,
|
||||
slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr,
|
||||
slahrd, slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
|
||||
slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
|
||||
slangb, slange, slangt, slanhs, slansb, slansp,
|
||||
slansy, slantb, slantp, slantr, slanv2,
|
||||
slapll, slapmt,
|
||||
|
@ -194,7 +194,7 @@
|
|||
slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv,
|
||||
slarrv, slartv,
|
||||
slarz, slarzb, slarzt, slasy2, slasyf,
|
||||
slatbs, slatdf, slatps, slatrd, slatrs, slatrz, slatzm,
|
||||
slatbs, slatdf, slatps, slatrd, slatrs, slatrz,
|
||||
sopgtr, sopmtr, sorg2l, sorg2r,
|
||||
sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2,
|
||||
sorgrq, sorgtr, sorm2l, sorm2r,
|
||||
|
@ -220,7 +220,7 @@
|
|||
stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri,
|
||||
stptrs,
|
||||
strcon, strevc, strexc, strrfs, strsen, strsna, strsyl,
|
||||
strtrs, stzrqf, stzrzf, sstemr,
|
||||
strtrs, stzrzf, sstemr,
|
||||
slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp,
|
||||
stfttr, stpttf, stpttr, strttf, strttp,
|
||||
sgejsv, sgesvj, sgsvj0, sgsvj1,
|
||||
|
@ -245,14 +245,13 @@
|
|||
cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx,
|
||||
cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd,
|
||||
cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx,
|
||||
cgegs, cgegv, cgehd2, cgehrd, cgelq2, cgelqf,
|
||||
cgels, cgelsd, cgelss, cgelsx, cgelsy, cgeql2, cgeqlf, cgeqp3,
|
||||
cgeqpf, cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
|
||||
cgehd2, cgehrd, cgelq2, cgelqf,
|
||||
cgels, cgelsd, cgelss, cgelsy, cgeql2, cgeqlf, cgeqp3,
|
||||
cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
|
||||
cgerq2, cgerqf, cgesc2, cgesdd, cgesvd,
|
||||
cgesvx, cgetc2, cgetri,
|
||||
cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm,
|
||||
cgghrd, cgglse, cggqrf, cggrqf,
|
||||
cggsvd, cggsvp,
|
||||
cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev,
|
||||
chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd,
|
||||
checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst,
|
||||
|
@ -267,7 +266,7 @@
|
|||
claed0, claed7, claed8,
|
||||
claein, claesy, claev2, clags2, clagtm,
|
||||
clahef, clahqr,
|
||||
clahrd, clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
|
||||
clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
|
||||
clanhb, clanhe,
|
||||
clanhp, clanhs, clanht, clansb, clansp, clansy, clantb,
|
||||
clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge,
|
||||
|
@ -278,7 +277,7 @@
|
|||
clarfx, clargv, clarnv, clarrv, clartg, clartv,
|
||||
clarz, clarzb, clarzt, clascl, claset, clasr, classq,
|
||||
clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz,
|
||||
clatzm, cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
|
||||
cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
|
||||
cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs,
|
||||
cposv, cposvx, cpstrf, cpstf2,
|
||||
cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs,
|
||||
|
@ -293,7 +292,7 @@
|
|||
ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon,
|
||||
ctprfs, ctptri,
|
||||
ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna,
|
||||
ctrsyl, ctrtrs, ctzrqf, ctzrzf, cung2l, cung2r,
|
||||
ctrsyl, ctrtrs, ctzrzf, cung2l, cung2r,
|
||||
cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2,
|
||||
cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2,
|
||||
cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz,
|
||||
|
@ -321,18 +320,18 @@
|
|||
dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv,
|
||||
dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2,
|
||||
dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx,
|
||||
dgegs, dgegv, dgehd2, dgehrd, dgelq2, dgelqf,
|
||||
dgels, dgelsd, dgelss, dgelsx, dgelsy, dgeql2, dgeqlf,
|
||||
dgeqp3, dgeqpf, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
|
||||
dgehd2, dgehrd, dgelq2, dgelqf,
|
||||
dgels, dgelsd, dgelss, dgelsy, dgeql2, dgeqlf,
|
||||
dgeqp3, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
|
||||
dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx,
|
||||
dgetc2, dgetri,
|
||||
dggbak, dggbal, dgges, dggesx, dggev, dggevx,
|
||||
dggglm, dgghrd, dgglse, dggqrf,
|
||||
dggrqf, dggsvd, dggsvp, dgtcon, dgtrfs, dgtsv,
|
||||
dggrqf, dgtcon, dgtrfs, dgtsv,
|
||||
dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz,
|
||||
dhsein, dhseqr, dlabrd, dlacon, dlacn2,
|
||||
dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr,
|
||||
dlahrd, dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
|
||||
dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
|
||||
dlangb, dlange, dlangt, dlanhs, dlansb, dlansp,
|
||||
dlansy, dlantb, dlantp, dlantr, dlanv2,
|
||||
dlapll, dlapmt,
|
||||
|
@ -342,7 +341,7 @@
|
|||
dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx,
|
||||
dlargv, dlarrv, dlartv,
|
||||
dlarz, dlarzb, dlarzt, dlasy2, dlasyf,
|
||||
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, dlatzm,
|
||||
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz,
|
||||
dopgtr, dopmtr, dorg2l, dorg2r,
|
||||
dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2,
|
||||
dorgrq, dorgtr, dorm2l, dorm2r,
|
||||
|
@ -368,7 +367,7 @@
|
|||
dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri,
|
||||
dtptrs,
|
||||
dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl,
|
||||
dtrtrs, dtzrqf, dtzrzf, dstemr,
|
||||
dtrtrs, dtzrzf, dstemr,
|
||||
dsgesv, dsposv, dlag2s, slag2d, dlat2s,
|
||||
dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp,
|
||||
dtfttr, dtpttf, dtpttr, dtrttf, dtrttp,
|
||||
|
@ -387,14 +386,13 @@
|
|||
zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx,
|
||||
zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd,
|
||||
zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx,
|
||||
zgegs, zgegv, zgehd2, zgehrd, zgelq2, zgelqf,
|
||||
zgels, zgelsd, zgelss, zgelsx, zgelsy, zgeql2, zgeqlf, zgeqp3,
|
||||
zgeqpf, zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
|
||||
zgehd2, zgehrd, zgelq2, zgelqf,
|
||||
zgels, zgelsd, zgelss, zgelsy, zgeql2, zgeqlf, zgeqp3,
|
||||
zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
|
||||
zgesc2, zgesdd, zgesvd, zgesvx, zgetc2,
|
||||
zgetri,
|
||||
zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm,
|
||||
zgghrd, zgglse, zggqrf, zggrqf,
|
||||
zggsvd, zggsvp,
|
||||
zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev,
|
||||
zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd,
|
||||
zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst,
|
||||
|
@ -409,7 +407,7 @@
|
|||
zlaed0, zlaed7, zlaed8,
|
||||
zlaein, zlaesy, zlaev2, zlags2, zlagtm,
|
||||
zlahef, zlahqr,
|
||||
zlahrd, zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
|
||||
zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
|
||||
zlangt, zlanhb,
|
||||
zlanhe,
|
||||
zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb,
|
||||
|
@ -422,7 +420,7 @@
|
|||
zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv,
|
||||
zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr,
|
||||
zlassq, zlasyf,
|
||||
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, zlatzm,
|
||||
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz,
|
||||
zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv,
|
||||
zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs,
|
||||
zposv, zposvx, zpotrs, zpstrf, zpstf2,
|
||||
|
@ -438,7 +436,7 @@
|
|||
ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon,
|
||||
ztprfs, ztptri,
|
||||
ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna,
|
||||
ztrsyl, ztrtrs, ztzrqf, ztzrzf, zung2l,
|
||||
ztrsyl, ztrtrs, ztzrzf, zung2l,
|
||||
zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2,
|
||||
zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2,
|
||||
zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz,
|
||||
|
@ -452,6 +450,140 @@
|
|||
zunbdb5, zunbdb6, zuncsd, zuncsd2by1,
|
||||
zgeqrt, zgeqrt2, zgeqrt3, zgemqrt,
|
||||
ztpqrt, ztpqrt2, ztpmqrt, ztprfb,
|
||||
# functions added for lapack-3.6.0
|
||||
|
||||
cgejsv,
|
||||
cgesvdx,
|
||||
cgesvj,
|
||||
cgetrf2,
|
||||
cgges3,
|
||||
cggev3,
|
||||
cgghd3,
|
||||
cggsvd3,
|
||||
cggsvp3,
|
||||
cgsvj0,
|
||||
cgsvj1,
|
||||
clagge,
|
||||
claghe,
|
||||
clagsy,
|
||||
clahilb,
|
||||
clakf2,
|
||||
clarge,
|
||||
clarnd,
|
||||
claror,
|
||||
clarot,
|
||||
clatm1,
|
||||
clatm2,
|
||||
clatm3,
|
||||
clatm5,
|
||||
clatm6,
|
||||
clatme,
|
||||
clatmr,
|
||||
clatms,
|
||||
clatmt,
|
||||
cpotrf2,
|
||||
csbmv,
|
||||
cspr2,
|
||||
csyr2,
|
||||
cunm22,
|
||||
dbdsvdx,
|
||||
dgesvdx,
|
||||
dgetrf2,
|
||||
dgges3,
|
||||
dggev3,
|
||||
dgghd3,
|
||||
dggsvd3,
|
||||
dggsvp3,
|
||||
dladiv2,
|
||||
dlagge,
|
||||
dlagsy,
|
||||
dlahilb,
|
||||
dlakf2,
|
||||
dlaran,
|
||||
dlarge,
|
||||
dlarnd,
|
||||
dlaror,
|
||||
dlarot,
|
||||
dlatm1,
|
||||
dlatm2,
|
||||
dlatm3,
|
||||
dlatm5,
|
||||
dlatm6,
|
||||
dlatm7,
|
||||
dlatme,
|
||||
dlatmr,
|
||||
dlatms,
|
||||
dlatmt,
|
||||
dorm22,
|
||||
dpotrf2,
|
||||
dsecnd,
|
||||
sbdsvdx,
|
||||
second,
|
||||
sgesvdx,
|
||||
sgetrf2,
|
||||
sgges3,
|
||||
sggev3,
|
||||
sgghd3,
|
||||
sggsvd3,
|
||||
sggsvp3,
|
||||
sladiv2,
|
||||
slagge,
|
||||
slagsy,
|
||||
slahilb,
|
||||
slakf2,
|
||||
slaran,
|
||||
slarge,
|
||||
slarnd,
|
||||
slaror,
|
||||
slarot,
|
||||
slatm1,
|
||||
slatm2,
|
||||
slatm3,
|
||||
slatm5,
|
||||
slatm6,
|
||||
slatm7,
|
||||
slatme,
|
||||
slatmr,
|
||||
slatms,
|
||||
slatmt,
|
||||
sorm22,
|
||||
spotrf2,
|
||||
xerbla,
|
||||
zgejsv,
|
||||
zgesvdx,
|
||||
zgesvj,
|
||||
zgetrf2,
|
||||
zgges3,
|
||||
zggev3,
|
||||
zgghd3,
|
||||
zggsvd3,
|
||||
zggsvp3,
|
||||
zgsvj0,
|
||||
zgsvj1,
|
||||
zlagge,
|
||||
zlaghe,
|
||||
zlagsy,
|
||||
zlahilb,
|
||||
zlakf2,
|
||||
zlarge,
|
||||
zlarnd,
|
||||
zlaror,
|
||||
zlarot,
|
||||
zlatm1,
|
||||
zlatm2,
|
||||
zlatm3,
|
||||
zlatm5,
|
||||
zlatm6,
|
||||
zlatme,
|
||||
zlatmr,
|
||||
zlatms,
|
||||
zlatmt,
|
||||
zpotrf2,
|
||||
zsbmv,
|
||||
zspr2,
|
||||
zsyr2,
|
||||
zunm22
|
||||
|
||||
);
|
||||
|
||||
@lapack_extendedprecision_objs = (
|
||||
|
@ -682,8 +814,6 @@
|
|||
LAPACKE_cgeqlf_work,
|
||||
LAPACKE_cgeqp3,
|
||||
LAPACKE_cgeqp3_work,
|
||||
LAPACKE_cgeqpf,
|
||||
LAPACKE_cgeqpf_work,
|
||||
LAPACKE_cgeqr2,
|
||||
LAPACKE_cgeqr2_work,
|
||||
LAPACKE_cgeqrf,
|
||||
|
@ -738,10 +868,6 @@
|
|||
LAPACKE_cggqrf_work,
|
||||
LAPACKE_cggrqf,
|
||||
LAPACKE_cggrqf_work,
|
||||
LAPACKE_cggsvd,
|
||||
LAPACKE_cggsvd_work,
|
||||
LAPACKE_cggsvp,
|
||||
LAPACKE_cggsvp_work,
|
||||
LAPACKE_cgtcon,
|
||||
LAPACKE_cgtcon_work,
|
||||
LAPACKE_cgtrfs,
|
||||
|
@ -1186,8 +1312,6 @@
|
|||
LAPACKE_dgeqlf_work,
|
||||
LAPACKE_dgeqp3,
|
||||
LAPACKE_dgeqp3_work,
|
||||
LAPACKE_dgeqpf,
|
||||
LAPACKE_dgeqpf_work,
|
||||
LAPACKE_dgeqr2,
|
||||
LAPACKE_dgeqr2_work,
|
||||
LAPACKE_dgeqrf,
|
||||
|
@ -1244,10 +1368,6 @@
|
|||
LAPACKE_dggqrf_work,
|
||||
LAPACKE_dggrqf,
|
||||
LAPACKE_dggrqf_work,
|
||||
LAPACKE_dggsvd,
|
||||
LAPACKE_dggsvd_work,
|
||||
LAPACKE_dggsvp,
|
||||
LAPACKE_dggsvp_work,
|
||||
LAPACKE_dgtcon,
|
||||
LAPACKE_dgtcon_work,
|
||||
LAPACKE_dgtrfs,
|
||||
|
@ -1676,8 +1796,6 @@
|
|||
LAPACKE_sgeqlf_work,
|
||||
LAPACKE_sgeqp3,
|
||||
LAPACKE_sgeqp3_work,
|
||||
LAPACKE_sgeqpf,
|
||||
LAPACKE_sgeqpf_work,
|
||||
LAPACKE_sgeqr2,
|
||||
LAPACKE_sgeqr2_work,
|
||||
LAPACKE_sgeqrf,
|
||||
|
@ -1734,10 +1852,6 @@
|
|||
LAPACKE_sggqrf_work,
|
||||
LAPACKE_sggrqf,
|
||||
LAPACKE_sggrqf_work,
|
||||
LAPACKE_sggsvd,
|
||||
LAPACKE_sggsvd_work,
|
||||
LAPACKE_sggsvp,
|
||||
LAPACKE_sggsvp_work,
|
||||
LAPACKE_sgtcon,
|
||||
LAPACKE_sgtcon_work,
|
||||
LAPACKE_sgtrfs,
|
||||
|
@ -2158,8 +2272,6 @@
|
|||
LAPACKE_zgeqlf_work,
|
||||
LAPACKE_zgeqp3,
|
||||
LAPACKE_zgeqp3_work,
|
||||
LAPACKE_zgeqpf,
|
||||
LAPACKE_zgeqpf_work,
|
||||
LAPACKE_zgeqr2,
|
||||
LAPACKE_zgeqr2_work,
|
||||
LAPACKE_zgeqrf,
|
||||
|
@ -2214,10 +2326,6 @@
|
|||
LAPACKE_zggqrf_work,
|
||||
LAPACKE_zggrqf,
|
||||
LAPACKE_zggrqf_work,
|
||||
LAPACKE_zggsvd,
|
||||
LAPACKE_zggsvd_work,
|
||||
LAPACKE_zggsvp,
|
||||
LAPACKE_zggsvp_work,
|
||||
LAPACKE_zgtcon,
|
||||
LAPACKE_zgtcon_work,
|
||||
LAPACKE_zgtrfs,
|
||||
|
@ -2707,6 +2815,134 @@
|
|||
LAPACKE_slagsy_work,
|
||||
LAPACKE_zlagsy,
|
||||
LAPACKE_zlagsy_work,
|
||||
## new function from lapack-3.6.0
|
||||
|
||||
LAPACKE_cgejsv,
|
||||
LAPACKE_cgejsv_work,
|
||||
LAPACKE_cgesvdx,
|
||||
LAPACKE_cgesvdx_work,
|
||||
LAPACKE_cgesvj,
|
||||
LAPACKE_cgesvj_work,
|
||||
LAPACKE_cgetrf2,
|
||||
LAPACKE_cgetrf2_work,
|
||||
LAPACKE_cgges3,
|
||||
LAPACKE_cgges3_work,
|
||||
LAPACKE_cggev3,
|
||||
LAPACKE_cggev3_work,
|
||||
LAPACKE_cgghd3,
|
||||
LAPACKE_cgghd3_work,
|
||||
LAPACKE_cggsvd3,
|
||||
LAPACKE_cggsvd3_work,
|
||||
LAPACKE_cggsvp3,
|
||||
LAPACKE_cggsvp3_work,
|
||||
LAPACKE_chetrf_rook,
|
||||
LAPACKE_chetrf_rook_work,
|
||||
LAPACKE_chetrs_rook,
|
||||
LAPACKE_chetrs_rook_work,
|
||||
LAPACKE_clapmt,
|
||||
LAPACKE_clapmt_work,
|
||||
LAPACKE_clascl,
|
||||
LAPACKE_clascl_work,
|
||||
LAPACKE_cpotrf2,
|
||||
LAPACKE_cpotrf2_work,
|
||||
LAPACKE_csytrf_rook,
|
||||
LAPACKE_csytrf_rook_work,
|
||||
LAPACKE_csytrs_rook,
|
||||
LAPACKE_csytrs_rook_work,
|
||||
LAPACKE_cuncsd2by1,
|
||||
LAPACKE_cuncsd2by1_work,
|
||||
LAPACKE_dbdsvdx,
|
||||
LAPACKE_dbdsvdx_work,
|
||||
LAPACKE_dgesvdx,
|
||||
LAPACKE_dgesvdx_work,
|
||||
LAPACKE_dgetrf2,
|
||||
LAPACKE_dgetrf2_work,
|
||||
LAPACKE_dgges3,
|
||||
LAPACKE_dgges3_work,
|
||||
LAPACKE_dggev3,
|
||||
LAPACKE_dggev3_work,
|
||||
LAPACKE_dgghd3,
|
||||
LAPACKE_dgghd3_work,
|
||||
LAPACKE_dggsvd3,
|
||||
LAPACKE_dggsvd3_work,
|
||||
LAPACKE_dggsvp3,
|
||||
LAPACKE_dggsvp3_work,
|
||||
LAPACKE_dlapmt,
|
||||
LAPACKE_dlapmt_work,
|
||||
LAPACKE_dlascl,
|
||||
LAPACKE_dlascl_work,
|
||||
LAPACKE_dorcsd2by1,
|
||||
LAPACKE_dorcsd2by1_work,
|
||||
LAPACKE_dpotrf2,
|
||||
LAPACKE_dpotrf2_work,
|
||||
LAPACKE_dsytrf_rook,
|
||||
LAPACKE_dsytrf_rook_work,
|
||||
LAPACKE_dsytrs_rook,
|
||||
LAPACKE_dsytrs_rook_work,
|
||||
LAPACKE_sbdsvdx,
|
||||
LAPACKE_sbdsvdx_work,
|
||||
LAPACKE_sgesvdx,
|
||||
LAPACKE_sgesvdx_work,
|
||||
LAPACKE_sgetrf2,
|
||||
LAPACKE_sgetrf2_work,
|
||||
LAPACKE_sgges3,
|
||||
LAPACKE_sgges3_work,
|
||||
LAPACKE_sggev3,
|
||||
LAPACKE_sggev3_work,
|
||||
LAPACKE_sgghd3,
|
||||
LAPACKE_sgghd3_work,
|
||||
LAPACKE_sggsvd3,
|
||||
LAPACKE_sggsvd3_work,
|
||||
LAPACKE_sggsvp3,
|
||||
LAPACKE_sggsvp3_work,
|
||||
LAPACKE_slapmt,
|
||||
LAPACKE_slapmt_work,
|
||||
LAPACKE_slascl,
|
||||
LAPACKE_slascl_work,
|
||||
LAPACKE_sorcsd2by1,
|
||||
LAPACKE_sorcsd2by1_work,
|
||||
LAPACKE_spotrf2,
|
||||
LAPACKE_spotrf2_work,
|
||||
LAPACKE_ssytrf_rook,
|
||||
LAPACKE_ssytrf_rook_work,
|
||||
LAPACKE_ssytrs_rook,
|
||||
LAPACKE_ssytrs_rook_work,
|
||||
LAPACKE_stpqrt,
|
||||
LAPACKE_stpqrt_work,
|
||||
LAPACKE_zgejsv,
|
||||
LAPACKE_zgejsv_work,
|
||||
LAPACKE_zgesvdx,
|
||||
LAPACKE_zgesvdx_work,
|
||||
LAPACKE_zgesvj,
|
||||
LAPACKE_zgesvj_work,
|
||||
LAPACKE_zgetrf2,
|
||||
LAPACKE_zgetrf2_work,
|
||||
LAPACKE_zgges3,
|
||||
LAPACKE_zgges3_work,
|
||||
LAPACKE_zggev3,
|
||||
LAPACKE_zggev3_work,
|
||||
LAPACKE_zgghd3,
|
||||
LAPACKE_zgghd3_work,
|
||||
LAPACKE_zggsvd3,
|
||||
LAPACKE_zggsvd3_work,
|
||||
LAPACKE_zggsvp3,
|
||||
LAPACKE_zggsvp3_work,
|
||||
LAPACKE_zhetrf_rook,
|
||||
LAPACKE_zhetrf_rook_work,
|
||||
LAPACKE_zhetrs_rook,
|
||||
LAPACKE_zhetrs_rook_work,
|
||||
LAPACKE_zlapmt,
|
||||
LAPACKE_zlapmt_work,
|
||||
LAPACKE_zlascl,
|
||||
LAPACKE_zlascl_work,
|
||||
LAPACKE_zpotrf2,
|
||||
LAPACKE_zpotrf2_work,
|
||||
LAPACKE_zsytrf_rook,
|
||||
LAPACKE_zsytrf_rook_work,
|
||||
LAPACKE_zsytrs_rook,
|
||||
LAPACKE_zsytrs_rook_work,
|
||||
LAPACKE_zuncsd2by1,
|
||||
LAPACKE_zuncsd2by1_work
|
||||
);
|
||||
|
||||
#These function may need 2 underscores.
|
||||
|
|
|
@ -5,8 +5,8 @@ DAMAXKERNEL = amax.S
|
|||
CAMAXKERNEL = zamax.S
|
||||
ZAMAXKERNEL = zamax.S
|
||||
|
||||
ISAMAXKERNEL = isamax.S
|
||||
IDAMAXKERNEL = idamax.S
|
||||
ISAMAXKERNEL = iamax.S
|
||||
IDAMAXKERNEL = iamax.S
|
||||
ICAMAXKERNEL = izamax.S
|
||||
IZAMAXKERNEL = izamax.S
|
||||
|
||||
|
@ -25,13 +25,13 @@ DCOPYKERNEL = copy.S
|
|||
CCOPYKERNEL = copy.S
|
||||
ZCOPYKERNEL = copy.S
|
||||
|
||||
DOTKERNEL = dot.S
|
||||
SDOTKERNEL = dot.S
|
||||
DDOTKERNEL = dot.S
|
||||
CDOTKERNEL = zdot.S
|
||||
ZDOTKERNEL = zdot.S
|
||||
|
||||
SNRM2KERNEL = snrm2.S
|
||||
DNRM2KERNEL = dnrm2.S
|
||||
SNRM2KERNEL = nrm2.S
|
||||
DNRM2KERNEL = nrm2.S
|
||||
CNRM2KERNEL = znrm2.S
|
||||
ZNRM2KERNEL = znrm2.S
|
||||
|
||||
|
@ -40,7 +40,7 @@ DROTKERNEL = rot.S
|
|||
CROTKERNEL = zrot.S
|
||||
ZROTKERNEL = zrot.S
|
||||
|
||||
SCALKERNEL = scal.S
|
||||
SSCALKERNEL = scal.S
|
||||
DSCALKERNEL = scal.S
|
||||
CSCALKERNEL = zscal.S
|
||||
ZSCALKERNEL = zscal.S
|
||||
|
|
|
@ -181,73 +181,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fmul v16.4s, v0.4s, v8.4s[0]
|
||||
OP_ii v16.4s, v1.4s, v9.4s[0]
|
||||
fmul v17.4s, v0.4s, v9.4s[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v17.4s, v17.4s
|
||||
eor v17.16b, v17.16b, v17.16b
|
||||
fmls v17.4s, v0.4s, v9.4s[0]
|
||||
#else
|
||||
fmul v17.4s, v0.4s, v9.4s[0]
|
||||
#endif
|
||||
OP_ir v17.4s, v1.4s, v8.4s[0]
|
||||
|
||||
fmul v20.4s, v0.4s, v8.4s[1]
|
||||
OP_ii v20.4s, v1.4s, v9.4s[1]
|
||||
fmul v21.4s, v0.4s, v9.4s[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v21.4s, v21.4s
|
||||
eor v21.16b, v21.16b, v21.16b
|
||||
fmls v21.4s, v0.4s, v9.4s[1]
|
||||
#else
|
||||
fmul v21.4s, v0.4s, v9.4s[1]
|
||||
#endif
|
||||
OP_ir v21.4s, v1.4s, v8.4s[1]
|
||||
|
||||
fmul v24.4s, v0.4s, v8.4s[2]
|
||||
OP_ii v24.4s, v1.4s, v9.4s[2]
|
||||
fmul v25.4s, v0.4s, v9.4s[2]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v25.4s, v25.4s
|
||||
eor v25.16b, v25.16b, v25.16b
|
||||
fmls v25.4s, v0.4s, v9.4s[2]
|
||||
#else
|
||||
fmul v25.4s, v0.4s, v9.4s[2]
|
||||
#endif
|
||||
OP_ir v25.4s, v1.4s, v8.4s[2]
|
||||
|
||||
fmul v28.4s, v0.4s, v8.4s[3]
|
||||
OP_ii v28.4s, v1.4s, v9.4s[3]
|
||||
fmul v29.4s, v0.4s, v9.4s[3]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v29.4s, v29.4s
|
||||
eor v29.16b, v29.16b, v29.16b
|
||||
fmls v29.4s, v0.4s, v9.4s[3]
|
||||
#else
|
||||
fmul v29.4s, v0.4s, v9.4s[3]
|
||||
#endif
|
||||
OP_ir v29.4s, v1.4s, v8.4s[3]
|
||||
|
||||
fmul v18.4s, v2.4s, v8.4s[0]
|
||||
OP_ii v18.4s, v3.4s, v9.4s[0]
|
||||
fmul v19.4s, v2.4s, v9.4s[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v19.4s, v19.4s
|
||||
eor v19.16b, v19.16b, v19.16b
|
||||
fmls v19.4s, v2.4s, v9.4s[0]
|
||||
#else
|
||||
fmul v19.4s, v2.4s, v9.4s[0]
|
||||
#endif
|
||||
OP_ir v19.4s, v3.4s, v8.4s[0]
|
||||
|
||||
fmul v22.4s, v2.4s, v8.4s[1]
|
||||
OP_ii v22.4s, v3.4s, v9.4s[1]
|
||||
fmul v23.4s, v2.4s, v9.4s[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v23.4s, v23.4s
|
||||
eor v23.16b, v23.16b, v23.16b
|
||||
fmls v23.4s, v2.4s, v9.4s[1]
|
||||
#else
|
||||
fmul v23.4s, v2.4s, v9.4s[1]
|
||||
#endif
|
||||
OP_ir v23.4s, v3.4s, v8.4s[1]
|
||||
|
||||
fmul v26.4s, v2.4s, v8.4s[2]
|
||||
OP_ii v26.4s, v3.4s, v9.4s[2]
|
||||
fmul v27.4s, v2.4s, v9.4s[2]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v27.4s, v27.4s
|
||||
eor v27.16b, v27.16b, v27.16b
|
||||
fmls v27.4s, v2.4s, v9.4s[2]
|
||||
#else
|
||||
fmul v27.4s, v2.4s, v9.4s[2]
|
||||
#endif
|
||||
OP_ir v27.4s, v3.4s, v8.4s[2]
|
||||
|
||||
fmul v30.4s, v2.4s, v8.4s[3]
|
||||
OP_ii v30.4s, v3.4s, v9.4s[3]
|
||||
fmul v31.4s, v2.4s, v9.4s[3]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v31.4s, v31.4s
|
||||
eor v31.16b, v31.16b, v31.16b
|
||||
fmls v31.4s, v2.4s, v9.4s[3]
|
||||
#else
|
||||
fmul v31.4s, v2.4s, v9.4s[3]
|
||||
#endif
|
||||
OP_ir v31.4s, v3.4s, v8.4s[3]
|
||||
|
||||
|
|
|
@ -172,37 +172,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fmul v16.4s, v0.4s, v8.4s[0]
|
||||
OP_ii v16.4s, v1.4s, v9.4s[0]
|
||||
fmul v17.4s, v0.4s, v9.4s[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v17.4s, v17.4s
|
||||
eor v17.16b, v17.16b, v17.16b
|
||||
fmls v17.4s, v0.4s, v9.4s[0]
|
||||
#else
|
||||
fmul v17.4s, v0.4s, v9.4s[0]
|
||||
#endif
|
||||
OP_ir v17.4s, v1.4s, v8.4s[0]
|
||||
|
||||
fmul v20.4s, v0.4s, v8.4s[1]
|
||||
OP_ii v20.4s, v1.4s, v9.4s[1]
|
||||
fmul v21.4s, v0.4s, v9.4s[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v21.4s, v21.4s
|
||||
eor v21.16b, v21.16b, v21.16b
|
||||
fmls v21.4s, v0.4s, v9.4s[1]
|
||||
#else
|
||||
fmul v21.4s, v0.4s, v9.4s[1]
|
||||
#endif
|
||||
OP_ir v21.4s, v1.4s, v8.4s[1]
|
||||
|
||||
fmul v24.4s, v0.4s, v8.4s[2]
|
||||
OP_ii v24.4s, v1.4s, v9.4s[2]
|
||||
fmul v25.4s, v0.4s, v9.4s[2]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v25.4s, v25.4s
|
||||
eor v25.16b, v25.16b, v25.16b
|
||||
fmls v25.4s, v0.4s, v9.4s[2]
|
||||
#else
|
||||
fmul v25.4s, v0.4s, v9.4s[2]
|
||||
#endif
|
||||
OP_ir v25.4s, v1.4s, v8.4s[2]
|
||||
|
||||
fmul v28.4s, v0.4s, v8.4s[3]
|
||||
OP_ii v28.4s, v1.4s, v9.4s[3]
|
||||
fmul v29.4s, v0.4s, v9.4s[3]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v29.4s, v29.4s
|
||||
eor v29.16b, v29.16b, v29.16b
|
||||
fmls v29.4s, v0.4s, v9.4s[3]
|
||||
#else
|
||||
fmul v29.4s, v0.4s, v9.4s[3]
|
||||
#endif
|
||||
OP_ir v29.4s, v1.4s, v8.4s[3]
|
||||
|
||||
|
|
|
@ -45,16 +45,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define COND ge
|
||||
#endif
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define MAXF s0
|
||||
#define TMPF s1
|
||||
#define TMPVF {v1.s}[0]
|
||||
#define SZ 4
|
||||
#else
|
||||
#define MAXF d0
|
||||
#define TMPF d1
|
||||
#define TMPVF {v1.d}[0]
|
||||
#define SZ 8
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
.macro INIT_S
|
||||
#if !defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #2
|
||||
ld1 {v0.s}[0], [X], INC_X
|
||||
#else
|
||||
lsl INC_X, INC_X, #3
|
||||
ld1 {v0.d}[0], [X], INC_X
|
||||
#endif
|
||||
mov Z, #1
|
||||
mov INDEX, Z
|
||||
fabs MAXF, MAXF
|
||||
|
@ -107,9 +119,8 @@ iamax_kernel_S1:
|
|||
iamax_kernel_S10:
|
||||
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne iamax_kernel_S10
|
||||
subs I, I, #1
|
||||
bne iamax_kernel_S10
|
||||
|
||||
iamax_kernel_L999:
|
||||
|
|
@ -1,213 +0,0 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
#define N x0 /* vector length */
|
||||
#define X x1 /* X vector address */
|
||||
#define INC_X x2 /* X stride */
|
||||
#define INDEX x3 /* index of max/min value */
|
||||
#define Z x4 /* vector index */
|
||||
#define I x5 /* loop variable */
|
||||
#define X_COPY x6 /* copy of X address */
|
||||
#define MAXF_Z x7
|
||||
|
||||
/*******************************************************************************
|
||||
* Macro definitions
|
||||
*******************************************************************************/
|
||||
|
||||
#define MAXF s5
|
||||
#define TMPF s6
|
||||
#define TMPVF {v6.s}[0]
|
||||
#define SZ 4
|
||||
|
||||
/******************************************************************************/
|
||||
|
||||
.macro INIT_F1
|
||||
ldr MAXF, [X], #SZ
|
||||
mov Z, #1
|
||||
mov INDEX, Z
|
||||
fabs MAXF, MAXF
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F1
|
||||
ldr TMPF, [X], #SZ
|
||||
add Z, Z, #1
|
||||
fabs TMPF, TMPF
|
||||
fcmp TMPF, MAXF
|
||||
fcsel MAXF, MAXF, TMPF, le
|
||||
csel INDEX, INDEX, Z, le
|
||||
.endm
|
||||
|
||||
.macro INIT_F4
|
||||
ld1 {v0.4s}, [X], #16
|
||||
fabs v0.4s, v0.4s
|
||||
fmaxv MAXF, v0.4s
|
||||
mov Z, #5
|
||||
mov MAXF_Z, #1
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F4
|
||||
ld1 {v0.4s}, [X], #16
|
||||
fabs v0.4s, v0.4s
|
||||
fmaxv TMPF, v0.4s
|
||||
PRFM PLDL1KEEP, [X, #512]
|
||||
fcmp TMPF, MAXF
|
||||
fcsel MAXF, MAXF, TMPF, le
|
||||
csel MAXF_Z, MAXF_Z, Z, le
|
||||
add Z, Z, #4
|
||||
.endm
|
||||
|
||||
|
||||
.macro KERNEL_F4_FINALIZE
|
||||
mov INDEX, MAXF_Z
|
||||
sub MAXF_Z, MAXF_Z, #1
|
||||
lsl MAXF_Z, MAXF_Z, #2
|
||||
add X_COPY, X_COPY, MAXF_Z
|
||||
ldr TMPF, [X_COPY], #SZ
|
||||
fabs TMPF, TMPF
|
||||
fcmp TMPF, MAXF
|
||||
beq KERNEL_F4_FINALIZE_DONE
|
||||
add INDEX, INDEX, #1
|
||||
ldr TMPF, [X_COPY], #SZ
|
||||
fabs TMPF, TMPF
|
||||
fcmp TMPF, MAXF
|
||||
beq KERNEL_F4_FINALIZE_DONE
|
||||
add INDEX, INDEX, #1
|
||||
ldr TMPF, [X_COPY], #SZ
|
||||
fabs TMPF, TMPF
|
||||
fcmp TMPF, MAXF
|
||||
beq KERNEL_F4_FINALIZE_DONE
|
||||
add INDEX, INDEX, #1
|
||||
KERNEL_F4_FINALIZE_DONE:
|
||||
.endm
|
||||
|
||||
|
||||
.macro INIT_S
|
||||
lsl INC_X, INC_X, #2
|
||||
ld1 TMPVF, [X], INC_X
|
||||
mov Z, #1
|
||||
mov INDEX, Z
|
||||
fabs MAXF, TMPF
|
||||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
ld1 TMPVF, [X], INC_X
|
||||
add Z, Z, #1
|
||||
fabs TMPF, TMPF
|
||||
fcmp TMPF, MAXF
|
||||
fcsel MAXF, MAXF, TMPF, le
|
||||
csel INDEX, INDEX, Z, le
|
||||
.endm
|
||||
|
||||
/*******************************************************************************
|
||||
* End of macro definitions
|
||||
*******************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
cmp N, xzr
|
||||
ble iamax_kernel_zero
|
||||
cmp INC_X, xzr
|
||||
ble iamax_kernel_zero
|
||||
|
||||
PRFM PLDL1KEEP, [X]
|
||||
mov X_COPY, X
|
||||
|
||||
cmp INC_X, #1
|
||||
bne iamax_kernel_S_BEGIN
|
||||
|
||||
iamax_kernel_F_BEGIN:
|
||||
asr I, N, #2
|
||||
cmp I, xzr
|
||||
beq iamax_kernel_F1_INIT
|
||||
|
||||
INIT_F4
|
||||
subs I, I, #1
|
||||
beq iamax_kernel_F4_FINALIZE
|
||||
|
||||
iamax_kernel_F4:
|
||||
KERNEL_F4
|
||||
subs I, I, #1
|
||||
bne iamax_kernel_F4
|
||||
|
||||
iamax_kernel_F4_FINALIZE:
|
||||
KERNEL_F4_FINALIZE
|
||||
|
||||
iamax_kernel_F1:
|
||||
ands I, N, #3
|
||||
ble iamax_kernel_L999
|
||||
|
||||
iamax_kernel_F10:
|
||||
KERNEL_F1
|
||||
subs I, I, #1
|
||||
bne iamax_kernel_F10
|
||||
b iamax_kernel_L999
|
||||
|
||||
iamax_kernel_F1_INIT:
|
||||
INIT_F1
|
||||
subs N, N, #1
|
||||
b iamax_kernel_F1
|
||||
|
||||
iamax_kernel_S_BEGIN:
|
||||
INIT_S
|
||||
|
||||
subs N, N, #1
|
||||
ble iamax_kernel_L999
|
||||
|
||||
asr I, N, #2
|
||||
cmp I, xzr
|
||||
ble iamax_kernel_S1
|
||||
|
||||
iamax_kernel_S4:
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne iamax_kernel_S4
|
||||
|
||||
iamax_kernel_S1:
|
||||
ands I, N, #3
|
||||
ble iamax_kernel_L999
|
||||
|
||||
iamax_kernel_S10:
|
||||
KERNEL_S1
|
||||
subs I, I, #1
|
||||
bne iamax_kernel_S10
|
||||
|
||||
iamax_kernel_L999:
|
||||
mov x0, INDEX
|
||||
ret
|
||||
|
||||
iamax_kernel_zero:
|
||||
mov x0, xzr
|
||||
ret
|
||||
|
||||
EPILOGUE
|
|
@ -0,0 +1,225 @@
|
|||
/*******************************************************************************
|
||||
Copyright (c) 2015, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*******************************************************************************/
|
||||
|
||||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
#define N x0
|
||||
#define X x1
|
||||
#define INC_X x2
|
||||
|
||||
#define I x3
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define SSQ s0
|
||||
#define SCALE s1
|
||||
#define REGZERO s5
|
||||
#define REGONE s6
|
||||
#else
|
||||
#define SSQ d0
|
||||
#define SCALE d1
|
||||
#define REGZERO d5
|
||||
#define REGONE d6
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
* Macro definitions
|
||||
*******************************************************************************/
|
||||
|
||||
.macro KERNEL_F1
|
||||
#if !defined(DOUBLE)
|
||||
ldr s4, [X], #4
|
||||
fcmp s4, REGZERO
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
fabs s4, s4
|
||||
fcmp SCALE, s4
|
||||
bge KERNEL_F1_SCALE_GE_X_\@
|
||||
fdiv s2, SCALE, s4
|
||||
fmul s2, s2, s2
|
||||
fmul s3, SSQ, s2
|
||||
fadd SSQ, REGONE, s3
|
||||
fmov SCALE, s4
|
||||
b KERNEL_F1_NEXT_\@
|
||||
KERNEL_F1_SCALE_GE_X_\@:
|
||||
fdiv s2, s4, SCALE
|
||||
fmla SSQ, s2, v2.s[0]
|
||||
#else
|
||||
ldr d4, [X], #8
|
||||
fcmp d4, REGZERO
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
fabs d4, d4
|
||||
fcmp SCALE, d4
|
||||
bge KERNEL_F1_SCALE_GE_X_\@
|
||||
fdiv d2, SCALE, d4
|
||||
fmul d2, d2, d2
|
||||
fmul d3, SSQ, d2
|
||||
fadd SSQ, REGONE, d3
|
||||
fmov SCALE, d4
|
||||
b KERNEL_F1_NEXT_\@
|
||||
KERNEL_F1_SCALE_GE_X_\@:
|
||||
fdiv d2, d4, SCALE
|
||||
fmla SSQ, d2, v2.d[0]
|
||||
#endif
|
||||
KERNEL_F1_NEXT_\@:
|
||||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
#if !defined(DOUBLE)
|
||||
ldr s4, [X]
|
||||
fcmp s4, REGZERO
|
||||
beq KERNEL_S1_NEXT
|
||||
fabs s4, s4
|
||||
fcmp SCALE, s4
|
||||
bge KERNEL_S1_SCALE_GE_X
|
||||
fdiv s2, SCALE, s4
|
||||
fmul s2, s2, s2
|
||||
fmul s3, SSQ, s2
|
||||
fadd SSQ, REGONE, s3
|
||||
fmov SCALE, s4
|
||||
b KERNEL_S1_NEXT
|
||||
KERNEL_S1_SCALE_GE_X:
|
||||
fdiv s2, s4, SCALE
|
||||
fmla SSQ, s2, v2.s[0]
|
||||
#else
|
||||
ldr d4, [X]
|
||||
fcmp d4, REGZERO
|
||||
beq KERNEL_S1_NEXT
|
||||
fabs d4, d4
|
||||
fcmp SCALE, d4
|
||||
bge KERNEL_S1_SCALE_GE_X
|
||||
fdiv d2, SCALE, d4
|
||||
fmul d2, d2, d2
|
||||
fmul d3, SSQ, d2
|
||||
fadd SSQ, REGONE, d3
|
||||
fmov SCALE, d4
|
||||
b KERNEL_S1_NEXT
|
||||
KERNEL_S1_SCALE_GE_X:
|
||||
fdiv d2, d4, SCALE
|
||||
fmla SSQ, d2, v2.d[0]
|
||||
#endif
|
||||
KERNEL_S1_NEXT:
|
||||
add X, X, INC_X
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F8
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
.endm
|
||||
|
||||
.macro INIT_S
|
||||
#if !defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #2 // INC_X * SIZE
|
||||
#else
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro INIT
|
||||
eor v1.16b, v1.16b, v1.16b // scale=0.0
|
||||
fmov SSQ, #1.0
|
||||
fmov REGONE, SSQ
|
||||
fmov REGZERO, SCALE
|
||||
.endm
|
||||
|
||||
/*******************************************************************************
|
||||
* End of macro definitions
|
||||
*******************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
.align 5
|
||||
|
||||
INIT
|
||||
|
||||
cmp N, #0
|
||||
ble nrm2_kernel_L999
|
||||
|
||||
cmp INC_X, #0
|
||||
beq nrm2_kernel_L999
|
||||
|
||||
|
||||
cmp INC_X, #1
|
||||
bne nrm2_kernel_S_BEGIN
|
||||
|
||||
nrm2_kernel_F_BEGIN:
|
||||
|
||||
asr I, N, #3 // I = N / 8
|
||||
cmp I, xzr
|
||||
ble nrm2_kernel_F1
|
||||
|
||||
nrm2_kernel_F8:
|
||||
|
||||
KERNEL_F8
|
||||
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_F8
|
||||
|
||||
nrm2_kernel_F1:
|
||||
|
||||
ands I, N, #7
|
||||
ble nrm2_kernel_L999
|
||||
|
||||
|
||||
nrm2_kernel_F10:
|
||||
|
||||
KERNEL_F1
|
||||
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_F10
|
||||
|
||||
b nrm2_kernel_L999
|
||||
|
||||
nrm2_kernel_S_BEGIN:
|
||||
|
||||
INIT_S
|
||||
|
||||
mov I, N
|
||||
|
||||
.align 5
|
||||
|
||||
nrm2_kernel_S10:
|
||||
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_S10
|
||||
|
||||
|
||||
nrm2_kernel_L999:
|
||||
fsqrt SSQ, SSQ
|
||||
fmul SSQ, SCALE, SSQ
|
||||
|
||||
ret
|
||||
|
||||
EPILOGUE
|
||||
|
|
@ -59,10 +59,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
.macro INIT_F1
|
||||
#if !defined(DOUBLE)
|
||||
fneg s2, S
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub s2, s2, S
|
||||
ins v1.s[1], v2.s[0] // [-S, S]
|
||||
#else
|
||||
fneg d2, S
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub d2, d2, S
|
||||
ins v1.d[1], v2.d[0] // [-S, S]
|
||||
#endif
|
||||
.endm
|
||||
|
|
|
@ -43,14 +43,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if !defined(DOUBLE)
|
||||
#define DA_R s0 /* scale input value */
|
||||
#define DA_I s1 /* scale input value */
|
||||
#define TMPX v2.2s
|
||||
#define TMPY v3.2s
|
||||
#define SZ 4
|
||||
#else
|
||||
#define DA_R d0 /* scale input value */
|
||||
#define DA_I d1 /* scale input value */
|
||||
#define TMPX v2.2d
|
||||
#define TMPY v3.2d
|
||||
#define SZ 8
|
||||
#endif
|
||||
|
||||
|
@ -61,22 +57,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if !defined(CONJ)
|
||||
#if !defined(DOUBLE)
|
||||
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
||||
fneg s2, DA_I
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub s2, s2, DA_I
|
||||
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
|
||||
ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I
|
||||
#else
|
||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
||||
fneg d2, DA_I
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub d2, d2, DA_I
|
||||
ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I
|
||||
ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I
|
||||
#endif
|
||||
#else
|
||||
#if !defined(DOUBLE)
|
||||
fneg s2, DA_R
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub s2, s2, DA_R
|
||||
ins v0.s[1], v2.s[0] // v0 = -DA_R, DA_R
|
||||
ins v1.s[1], v1.s[0] // v1 = DA_I, DA_I
|
||||
#else
|
||||
fneg d2, DA_R
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub d2, d2, DA_R
|
||||
ins v0.d[1], v2.d[0] // v0 = -DA_R, DA_R
|
||||
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I
|
||||
#endif
|
||||
|
@ -111,9 +111,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL_INIT_F4
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
// Replicate the lower 2 floats into the upper 2 slots
|
||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R
|
||||
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I
|
||||
ins v16.s[0], v0.s[0]
|
||||
ins v16.s[1], v16.s[0]
|
||||
ins v16.d[1], v16.d[0]
|
||||
#if !defined(CONJ)
|
||||
ins v17.s[0], v1.s[1]
|
||||
#else
|
||||
ins v17.s[0], v1.s[0]
|
||||
#endif
|
||||
ins v17.s[1], v17.s[0]
|
||||
ins v17.d[1], v17.d[0]
|
||||
#else //DOUBLE
|
||||
ins v16.d[0], v0.d[0]
|
||||
ins v16.d[1], v16.d[0]
|
||||
#if !defined(CONJ)
|
||||
ins v17.d[0], v1.d[1]
|
||||
#else
|
||||
ins v17.d[0], v1.d[0]
|
||||
#endif
|
||||
ins v17.d[1], v17.d[0]
|
||||
#endif
|
||||
|
||||
.endm
|
||||
|
@ -121,55 +137,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL_F4
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v2.4s,v3.4s}, [X], #32 // V2 = X[3], X[2], X[1], X[0]
|
||||
// V3 = X[7], X[6], X[5], X[4]
|
||||
ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1]
|
||||
ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1]
|
||||
ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1]
|
||||
ld2 {v2.4s, v3.4s}, [X], #32
|
||||
ld2 {v4.4s, v5.4s}, [Y_COPY], #32
|
||||
|
||||
ld1 {v4.4s,v5.4s}, [Y] // V4 = Y[3], Y[2], Y[1], Y[0]
|
||||
// V5 = Y[7], Y[6], Y[5], Y[4]
|
||||
fmla v4.4s, v2.4s, v16.4s
|
||||
#if !defined(CONJ)
|
||||
fmls v4.4s, v3.4s, v17.4s
|
||||
#else
|
||||
fmla v4.4s, v3.4s, v17.4s
|
||||
#endif
|
||||
|
||||
ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5]
|
||||
ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5]
|
||||
ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5]
|
||||
#if !defined(CONJ)
|
||||
fmla v5.4s, v2.4s, v17.4s
|
||||
#else
|
||||
fmls v5.4s, v2.4s, v17.4s
|
||||
#endif
|
||||
fmla v5.4s, v3.4s, v16.4s
|
||||
|
||||
fmla v4.4s, v0.4s, v2.4s // Y[iy] += DA_R * X[ix]
|
||||
// Y[iy+1] += +-DA_R * X[ix+1]
|
||||
fmla v4.4s, v1.4s, v6.4s // Y[iy] += +-DA_I * X[ix+1]
|
||||
// Y[iy+1] += DA_I * X[ix]
|
||||
st1 {v4.4s}, [Y], #16
|
||||
|
||||
fmla v5.4s, v0.4s, v3.4s // Y[iy] += DA_R * X[ix]
|
||||
fmla v5.4s, v1.4s, v7.4s // Y[iy] += +-DA_I * X[ix+1]
|
||||
// Y[iy+1] += +-DA_R * X[ix+1]
|
||||
// Y[iy+1] += DA_I * X[ix]
|
||||
st1 {v5.4s}, [Y], #16
|
||||
st2 {v4.4s, v5.4s}, [Y], #32
|
||||
#else // DOUBLE
|
||||
ld1 {v2.2d,v3.2d}, [X], #32 // CX0, CX1, CX2, CX3
|
||||
ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1]
|
||||
ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1]
|
||||
ld2 {v2.2d, v3.2d}, [X], #32
|
||||
ld2 {v4.2d, v5.2d}, [Y_COPY], #32
|
||||
|
||||
ld1 {v4.2d,v5.2d}, [X], #32 // CX0, CX1, CX2, CX3
|
||||
ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1]
|
||||
ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1]
|
||||
fmla v4.2d, v2.2d, v16.2d
|
||||
#if !defined(CONJ)
|
||||
fmls v4.2d, v3.2d, v17.2d
|
||||
#else
|
||||
fmla v4.2d, v3.2d, v17.2d
|
||||
#endif
|
||||
#if !defined(CONJ)
|
||||
fmla v5.2d, v2.2d, v17.2d
|
||||
#else
|
||||
fmls v5.2d, v2.2d, v17.2d
|
||||
#endif
|
||||
fmla v5.2d, v3.2d, v16.2d
|
||||
|
||||
ld1 {v16.2d,v17.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3
|
||||
st2 {v4.2d, v5.2d}, [Y], #32
|
||||
|
||||
fmla v16.2d, v0.2d, v2.2d
|
||||
fmla v17.2d, v0.2d, v3.2d
|
||||
ld2 {v18.2d, v19.2d}, [X], #32
|
||||
ld2 {v20.2d, v21.2d}, [Y_COPY], #32
|
||||
|
||||
ld1 {v18.2d,v19.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3
|
||||
fmla v20.2d, v18.2d, v16.2d
|
||||
#if !defined(CONJ)
|
||||
fmls v20.2d, v19.2d, v17.2d
|
||||
#else
|
||||
fmla v20.2d, v19.2d, v17.2d
|
||||
#endif
|
||||
#if !defined(CONJ)
|
||||
fmla v21.2d, v18.2d, v17.2d
|
||||
#else
|
||||
fmls v21.2d, v18.2d, v17.2d
|
||||
#endif
|
||||
fmla v21.2d, v19.2d, v16.2d
|
||||
|
||||
fmla v16.2d, v1.2d, v20.2d
|
||||
fmla v17.2d, v1.2d, v21.2d
|
||||
st1 {v16.2d,v17.2d}, [Y], #32
|
||||
|
||||
fmla v18.2d, v0.2d, v4.2d
|
||||
fmla v19.2d, v0.2d, v5.2d
|
||||
fmla v18.2d, v1.2d, v22.2d
|
||||
fmla v19.2d, v1.2d, v23.2d
|
||||
st1 {v18.2d,v19.2d}, [Y], #32
|
||||
st2 {v20.2d, v21.2d}, [Y], #32
|
||||
#endif
|
||||
PRFM PLDL1KEEP, [X, #512]
|
||||
PRFM PLDL1KEEP, [Y, #512]
|
||||
|
|
|
@ -184,73 +184,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fmul v16.2d, v0.2d, v8.2d[0]
|
||||
OP_ii v16.2d, v1.2d, v9.2d[0]
|
||||
fmul v17.2d, v0.2d, v9.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v17.2d, v17.2d
|
||||
eor v17.16b, v17.16b, v17.16b
|
||||
fmls v17.2d, v0.2d, v9.2d[0]
|
||||
#else
|
||||
fmul v17.2d, v0.2d, v9.2d[0]
|
||||
#endif
|
||||
OP_ir v17.2d, v1.2d, v8.2d[0]
|
||||
|
||||
fmul v18.2d, v2.2d, v8.2d[0]
|
||||
OP_ii v18.2d, v3.2d, v9.2d[0]
|
||||
fmul v19.2d, v2.2d, v9.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v19.2d, v19.2d
|
||||
eor v19.16b, v19.16b, v19.16b
|
||||
fmls v19.2d, v2.2d, v9.2d[0]
|
||||
#else
|
||||
fmul v19.2d, v2.2d, v9.2d[0]
|
||||
#endif
|
||||
OP_ir v19.2d, v3.2d, v8.2d[0]
|
||||
|
||||
fmul v20.2d, v0.2d, v8.2d[1]
|
||||
OP_ii v20.2d, v1.2d, v9.2d[1]
|
||||
fmul v21.2d, v0.2d, v9.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v21.2d, v21.2d
|
||||
eor v21.16b, v21.16b, v21.16b
|
||||
fmls v21.2d, v0.2d, v9.2d[1]
|
||||
#else
|
||||
fmul v21.2d, v0.2d, v9.2d[1]
|
||||
#endif
|
||||
OP_ir v21.2d, v1.2d, v8.2d[1]
|
||||
|
||||
fmul v22.2d, v2.2d, v8.2d[1]
|
||||
OP_ii v22.2d, v3.2d, v9.2d[1]
|
||||
fmul v23.2d, v2.2d, v9.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v23.2d, v23.2d
|
||||
eor v23.16b, v23.16b, v23.16b
|
||||
fmls v23.2d, v2.2d, v9.2d[1]
|
||||
#else
|
||||
fmul v23.2d, v2.2d, v9.2d[1]
|
||||
#endif
|
||||
OP_ir v23.2d, v3.2d, v8.2d[1]
|
||||
|
||||
fmul v24.2d, v0.2d, v10.2d[0]
|
||||
OP_ii v24.2d, v1.2d, v11.2d[0]
|
||||
fmul v25.2d, v0.2d, v11.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v25.2d, v25.2d
|
||||
eor v25.16b, v25.16b, v25.16b
|
||||
fmls v25.2d, v0.2d, v11.2d[0]
|
||||
#else
|
||||
fmul v25.2d, v0.2d, v11.2d[0]
|
||||
#endif
|
||||
OP_ir v25.2d, v1.2d, v10.2d[0]
|
||||
|
||||
fmul v26.2d, v2.2d, v10.2d[0]
|
||||
OP_ii v26.2d, v3.2d, v11.2d[0]
|
||||
fmul v27.2d, v2.2d, v11.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v27.2d, v27.2d
|
||||
eor v27.16b, v27.16b, v27.16b
|
||||
fmls v27.2d, v2.2d, v11.2d[0]
|
||||
#else
|
||||
fmul v27.2d, v2.2d, v11.2d[0]
|
||||
#endif
|
||||
OP_ir v27.2d, v3.2d, v10.2d[0]
|
||||
|
||||
fmul v28.2d, v0.2d, v10.2d[1]
|
||||
OP_ii v28.2d, v1.2d, v11.2d[1]
|
||||
fmul v29.2d, v0.2d, v11.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v29.2d, v29.2d
|
||||
eor v29.16b, v29.16b, v29.16b
|
||||
fmls v29.2d, v0.2d, v11.2d[1]
|
||||
#else
|
||||
fmul v29.2d, v0.2d, v11.2d[1]
|
||||
#endif
|
||||
OP_ir v29.2d, v1.2d, v10.2d[1]
|
||||
|
||||
fmul v30.2d, v2.2d, v10.2d[1]
|
||||
OP_ii v30.2d, v3.2d, v11.2d[1]
|
||||
fmul v31.2d, v2.2d, v11.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v31.2d, v31.2d
|
||||
eor v31.16b, v31.16b, v31.16b
|
||||
fmls v31.2d, v2.2d, v11.2d[1]
|
||||
#else
|
||||
fmul v31.2d, v2.2d, v11.2d[1]
|
||||
#endif
|
||||
OP_ir v31.2d, v3.2d, v10.2d[1]
|
||||
|
||||
|
|
|
@ -110,15 +110,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
/******* INIT FOR F1 AND S1 LOOP ******/
|
||||
#if !defined(DOUBLE)
|
||||
ins v0.s[1], v0.s[0] // R(ALPHA), R(ALPHA)
|
||||
fneg s2, ALPHA_I
|
||||
ins v0.s[1], v0.s[0] // R(ALPHA), R(ALPHA)
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub s2, s2, ALPHA_I
|
||||
ins v1.s[1], v2.s[0] // -I(ALPHA), I(ALPHA)
|
||||
#if !defined(XCONJ)
|
||||
ext v1.8b, v1.8b, v1.8b, #4 // I(ALPHA), -I(ALPHA)
|
||||
#endif
|
||||
#else
|
||||
ins v0.d[1], v0.d[0] // R(ALPHA), R(ALPHA)
|
||||
fneg d2, ALPHA_I
|
||||
ins v0.d[1], v0.d[0] // R(ALPHA), R(ALPHA)
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub d2, d2, ALPHA_I
|
||||
ins v1.d[1], v2.d[0] // -I(ALPHA), I(ALPHA)
|
||||
#if !defined(XCONJ)
|
||||
ext v1.16b, v1.16b, v1.16b, #8 // I(ALPHA), -I(ALPHA)
|
||||
|
@ -156,8 +158,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
fmul v11.4s, v9.4s, v7.4s // [+ R(X) * R(ALPHA)]
|
||||
fmls v11.4s, v10.4s, v8.4s // [- I(X) * I(ALPHA)]
|
||||
fmul v12.4s, v9.4s, v8.4s // [R(X) * I(ALPHA)]
|
||||
fneg v12.4s, v12.4s // [- R(X) * I(ALPHA)]
|
||||
eor v12.16b, v12.16b, v12.16b
|
||||
fmls v12.4s, v9.4s, v8.4s // [- R(X) * I(ALPHA)]
|
||||
fmla v12.4s, v10.4s, v7.4s // [- I(X) * R(ALPHA)]
|
||||
#endif
|
||||
#endif // CONJ
|
||||
|
@ -170,24 +172,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ins v3.s[0], v2.s[1]
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
fneg s4, s3
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub s4, s4, s3
|
||||
ins v3.s[1], v4.s[0]
|
||||
ext v3.8b, v3.8b, v3.8b, #4 // [I(TEMP), -I(TEMP)]
|
||||
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
|
||||
#else
|
||||
fneg s4, s3
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub s4, s4, s3
|
||||
ins v3.s[1], v4.s[0] // [-I(TEMP), I(TEMP)]
|
||||
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
|
||||
#endif
|
||||
#else // CONJ
|
||||
#if !defined(XCONJ)
|
||||
ins v3.s[1], v3.s[0] // [I(TEMP), I(TEMP)]
|
||||
fneg s4, s2
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub s4, s4, s2
|
||||
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
|
||||
#else
|
||||
fneg s3, s3
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub s3, s4, s3
|
||||
ins v3.s[1], v3.s[0] // [-I(TEMP), -I(TEMP)]
|
||||
fneg s4, s2
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub s4, s4, s2
|
||||
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
|
||||
#endif
|
||||
#endif // CONJ
|
||||
|
@ -220,8 +227,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
fmul v11.2d, v9.2d, v7.2d // [+ R(X) * R(ALPHA)]
|
||||
fmls v11.2d, v10.2d, v8.2d // [- I(X) * I(ALPHA)]
|
||||
fmul v12.2d, v9.2d, v8.2d // [R(X) * I(ALPHA)]
|
||||
fneg v12.2d, v12.2d // [- R(X) * I(ALPHA)]
|
||||
eor v12.16b, v12.16b, v12.16b
|
||||
fmls v12.2d, v9.2d, v8.2d // [- R(X) * I(ALPHA)]
|
||||
fmla v12.2d, v10.2d, v7.2d // [- I(X) * R(ALPHA)]
|
||||
#endif
|
||||
#endif // CONJ
|
||||
|
@ -234,24 +241,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ins v3.d[0], v2.d[1] // I(TEMP)
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
fneg d4, d3 // -I(TEMP)
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub d4, d4, d3
|
||||
ins v3.d[1], v4.d[0]
|
||||
ext v3.16b, v3.16b, v3.16b, #8 // [I(TEMP), -I(TEMP)]
|
||||
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
|
||||
#else
|
||||
fneg d4, d3 // -I(TEMP)
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub d4, d4, d3
|
||||
ins v3.d[1], v4.d[0] // [-I(TEMP), I(TEMP)]
|
||||
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
|
||||
#endif
|
||||
#else // CONJ
|
||||
#if !defined(XCONJ)
|
||||
ins v3.d[1], v3.d[0] // [I(TEMP), I(TEMP)]
|
||||
fneg d4, d2 // -R(TEMP)
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub d4, d4, d2
|
||||
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
|
||||
#else
|
||||
fneg d3, d3 // -I(TEMP)
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub d3, d4, d3
|
||||
ins v3.d[1], v3.d[0] // [-I(TEMP), -I(TEMP)]
|
||||
fneg d4, d2 // -R(TEMP)
|
||||
eor v4.16b, v4.16b, v4.16b
|
||||
fsub d4, d4, d2
|
||||
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
|
||||
#endif
|
||||
#endif // CONJ
|
||||
|
|
|
@ -96,22 +96,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if !defined(XCONJ)
|
||||
#if !defined(DOUBLE)
|
||||
ins v0.s[1], v0.s[0] // v0 = ALPHA_R, ALPHA_R
|
||||
fneg s2, ALPHA_I
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub s2, s2, ALPHA_I
|
||||
ins v1.s[1], v2.s[0]
|
||||
ext v1.8b, v1.8b, v1.8b, #4 // v1 = ALPHA_I, -ALPHA_I
|
||||
#else
|
||||
ins v0.d[1], v0.d[0] // v0 = ALPHA_R, ALPHA_R
|
||||
fneg d2, ALPHA_I
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub d2, d2, ALPHA_I
|
||||
ins v1.d[1], v2.d[0]
|
||||
ext v1.16b, v1.16b, v1.16b, #8 // v1 = ALPHA_I, -ALPHA_I
|
||||
#endif
|
||||
#else // XCONJ
|
||||
#if !defined(DOUBLE)
|
||||
fneg s2, ALPHA_R
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub s2, s2, ALPHA_R
|
||||
ins v0.s[1], v2.s[0] // v0 = -ALPHA_R, ALPHA_R
|
||||
ins v1.s[1], v1.s[0] // v1 = ALPHA_I, ALPHA_I
|
||||
#else
|
||||
fneg d2, ALPHA_R
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub d2, d2, ALPHA_R
|
||||
ins v0.d[1], v2.d[0] // v0 = -ALPHA_R, ALPHA_R
|
||||
ins v1.d[1], v1.d[0] // v1 = ALPHA_I, ALPHA_I
|
||||
#endif
|
||||
|
@ -136,89 +140,51 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld2 {v11.4s, v12.4s}, [X_PTR], #32
|
||||
ld2 {v13.4s, v14.4s}, [A_PTR], #32
|
||||
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
|
||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
||||
fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I]
|
||||
fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I]
|
||||
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
|
||||
#else
|
||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
||||
fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I]
|
||||
fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I]
|
||||
fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R]
|
||||
#endif
|
||||
#else // CONJ
|
||||
#if !defined(XCONJ)
|
||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
||||
fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I]
|
||||
fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I]
|
||||
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
|
||||
#else
|
||||
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
|
||||
fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I]
|
||||
fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I]
|
||||
fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R]
|
||||
#endif
|
||||
#endif // CONJ
|
||||
|
||||
#else // DOUBLE
|
||||
ld2 {v11.2d, v12.2d}, [X_PTR], #32
|
||||
ld2 {v13.2d, v14.2d}, [A_PTR], #32
|
||||
prfm PLDL1STRM, [X_PTR, #512]
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
|
||||
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
|
||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
||||
fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I]
|
||||
fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I]
|
||||
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
|
||||
#else
|
||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
||||
fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I]
|
||||
fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I]
|
||||
fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R]
|
||||
#endif
|
||||
#else // CONJ
|
||||
#if !defined(XCONJ)
|
||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
||||
fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I]
|
||||
fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I]
|
||||
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
|
||||
#else
|
||||
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
|
||||
fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I]
|
||||
fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I]
|
||||
fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R]
|
||||
#endif
|
||||
#endif // CONJ
|
||||
|
||||
ld2 {v17.2d, v18.2d}, [X_PTR], #32
|
||||
ld2 {v19.2d, v20.2d}, [A_PTR], #32
|
||||
prfm PLDL1STRM, [A_PTR, #512]
|
||||
#if !defined(CONJ)
|
||||
#if !defined(XCONJ)
|
||||
|
||||
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
|
||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
||||
fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
||||
fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
||||
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
||||
#else
|
||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
||||
fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
||||
fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
||||
fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
||||
#endif
|
||||
#else // CONJ
|
||||
#if !defined(XCONJ)
|
||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
||||
fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
||||
fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
||||
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
||||
#else
|
||||
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
|
||||
fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
|
||||
fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
|
||||
fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
|
||||
#endif
|
||||
#endif // CONJ
|
||||
|
||||
#endif //DOUBLE
|
||||
.endm
|
||||
|
||||
|
@ -252,7 +218,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
|
||||
ld1 {v5.s}[0], [A_PTR], #4 // A1
|
||||
ld1 {v6.2s}, [X_PTR], #8 // [X1, X0]
|
||||
fneg s16, s5
|
||||
eor v16.16b, v16.16b, v16.16b
|
||||
fsub s16, s16, s5
|
||||
ins v5.s[1], v16.s[0] // [-A1, A1]
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
|
||||
|
@ -264,7 +231,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
|
||||
ld1 {v5.d}[0], [A_PTR], #8 // A1
|
||||
ld1 {v6.2d}, [X_PTR], #16 // [X1, X0]
|
||||
fneg d16, d5
|
||||
eor v16.16b, v16.16b, v16.16b
|
||||
fsub d16, d16, d5
|
||||
ins v5.d[1], v16.d[0] // [-A1, A1]
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]
|
||||
|
@ -284,7 +252,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
|
||||
ld1 {v5.s}[0], [A_PTR], #4 // A1
|
||||
ld1 {v6.2s}, [X_PTR], INC_X // [X1, X0]
|
||||
fneg s16, s5
|
||||
eor v16.16b, v16.16b, v16.16b
|
||||
fsub s16, s16, s5
|
||||
ins v5.s[1], v16.s[0] // [-A1, A1]
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
|
||||
|
@ -296,7 +265,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
|
||||
ld1 {v5.d}[0], [A_PTR], #8 // A1
|
||||
ld1 {v6.2d}, [X_PTR], INC_X // [X1, X0]
|
||||
fneg d16, d5
|
||||
eor v16.16b, v16.16b, v16.16b
|
||||
fsub d16, d16, d5
|
||||
ins v5.d[1], v16.d[0] // [-A1, A1]
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]
|
||||
|
|
|
@ -28,201 +28,261 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ASSEMBLER
|
||||
#include "common.h"
|
||||
|
||||
#define N x0 /* vector length */
|
||||
#define X x1 /* X vector address */
|
||||
#define INC_X x2 /* X stride */
|
||||
#define I x5 /* loop variable */
|
||||
#define N x0
|
||||
#define X x1
|
||||
#define INC_X x2
|
||||
|
||||
/*******************************************************************************
|
||||
* Macro definitions
|
||||
*******************************************************************************/
|
||||
#define I x3
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define TMPF s6
|
||||
#define SSQ s0
|
||||
#define TMPVF {v6.s}[0]
|
||||
#define SZ 4
|
||||
#define SSQ s0
|
||||
#define SCALE s1
|
||||
#define REGZERO s6
|
||||
#define REGONE s7
|
||||
#else
|
||||
#define TMPF d6
|
||||
#define SSQ d0
|
||||
#define TMPVF {v6.d}[0]
|
||||
#define SZ 8
|
||||
#define SSQ d0
|
||||
#define SCALE d1
|
||||
#define REGZERO d6
|
||||
#define REGONE d7
|
||||
#endif
|
||||
|
||||
/******************************************************************************/
|
||||
/**************************************************************************************
|
||||
* Macro definitions
|
||||
**************************************************************************************/
|
||||
|
||||
.macro KERNEL_F1
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v1.2s}, [X], #8
|
||||
fmul v1.2s, v1.2s, v1.2s
|
||||
faddp TMPF, v1.2s
|
||||
fadd SSQ, SSQ, TMPF
|
||||
ldr s4, [X], #4
|
||||
fcmp s4, REGZERO
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
fabs s4, s4
|
||||
fcmp SCALE, s4
|
||||
bge KERNEL_F1_SCALE_GE_XR_\@
|
||||
fdiv s2, SCALE, s4
|
||||
fmul s2, s2, s2
|
||||
fmul s3, SSQ, s2
|
||||
fadd SSQ, REGONE, s3
|
||||
fmov SCALE, s4
|
||||
b KERNEL_F1_NEXT_\@
|
||||
KERNEL_F1_SCALE_GE_XR_\@:
|
||||
fdiv s2, s4, SCALE
|
||||
fmla SSQ, s2, v2.s[0]
|
||||
KERNEL_F1_NEXT_\@:
|
||||
ldr s5, [X], #4
|
||||
fcmp s5, REGZERO
|
||||
beq KERNEL_F1_END_\@
|
||||
fabs s5, s5
|
||||
fcmp SCALE, s5
|
||||
bge KERNEL_F1_SCALE_GE_XI_\@
|
||||
fdiv s2, SCALE, s5
|
||||
fmul s2, s2, s2
|
||||
fmul s3, SSQ, s2
|
||||
fadd SSQ, REGONE, s3
|
||||
fmov SCALE, s5
|
||||
b KERNEL_F1_END_\@
|
||||
KERNEL_F1_SCALE_GE_XI_\@:
|
||||
fdiv s2, s5, SCALE
|
||||
fmla SSQ, s2, v2.s[0]
|
||||
#else
|
||||
ld1 {v1.2d}, [X], #16
|
||||
fmul v1.2d, v1.2d, v1.2d
|
||||
faddp TMPF, v1.2d
|
||||
fadd SSQ, SSQ, TMPF
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F8
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v1.4s, v2.4s}, [X], #32
|
||||
fmla v0.4s, v1.4s, v1.4s
|
||||
fmla v5.4s, v2.4s, v2.4s
|
||||
ld1 {v3.4s,v4.4s}, [X], #32
|
||||
fmla v0.4s, v3.4s, v3.4s
|
||||
fmla v5.4s, v4.4s, v4.4s
|
||||
PRFM PLDL1KEEP, [X, #1024]
|
||||
#else // DOUBLE
|
||||
ld1 {v1.2d, v2.2d}, [X], #32
|
||||
fmla v0.2d, v1.2d, v1.2d
|
||||
fmla v5.2d, v2.2d, v2.2d
|
||||
ld1 {v3.2d, v4.2d}, [X], #32
|
||||
fmla v0.2d, v3.2d, v3.2d
|
||||
fmla v5.2d, v4.2d, v4.2d
|
||||
|
||||
ld1 {v16.2d, v17.2d}, [X], #32
|
||||
fmla v0.2d, v16.2d, v16.2d
|
||||
fmla v5.2d, v17.2d, v17.2d
|
||||
ld1 {v18.2d, v19.2d}, [X], #32
|
||||
fmla v0.2d, v18.2d, v18.2d
|
||||
fmla v5.2d, v19.2d, v19.2d
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro nrm2_kernel_F8_FINALIZE
|
||||
#if !defined(DOUBLE)
|
||||
fadd v0.4s, v0.4s, v5.4s
|
||||
ext v1.16b, v0.16b, v0.16b, #8
|
||||
fadd v0.2s, v0.2s, v1.2s
|
||||
faddp SSQ, v0.2s
|
||||
#else
|
||||
fadd v0.2d, v0.2d, v5.2d
|
||||
faddp SSQ, v0.2d
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro INIT_S
|
||||
#if !defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #3
|
||||
ld1 {v1.2s}, [X], INC_X
|
||||
fmul v1.2s, v1.2s, v1.2s
|
||||
faddp SSQ, v1.2s
|
||||
#else
|
||||
lsl INC_X, INC_X, #4
|
||||
ld1 {v1.2d}, [X], INC_X
|
||||
fmul v1.2d, v1.2d, v1.2d
|
||||
faddp SSQ, v1.2d
|
||||
ldr d4, [X], #8
|
||||
fcmp d4, REGZERO
|
||||
beq KERNEL_F1_NEXT_\@
|
||||
fabs d4, d4
|
||||
fcmp SCALE, d4
|
||||
bge KERNEL_F1_SCALE_GE_XR_\@
|
||||
fdiv d2, SCALE, d4
|
||||
fmul d2, d2, d2
|
||||
fmul d3, SSQ, d2
|
||||
fadd SSQ, REGONE, d3
|
||||
fmov SCALE, d4
|
||||
b KERNEL_F1_NEXT_\@
|
||||
KERNEL_F1_SCALE_GE_XR_\@:
|
||||
fdiv d2, d4, SCALE
|
||||
fmla SSQ, d2, v2.d[0]
|
||||
KERNEL_F1_NEXT_\@:
|
||||
ldr d5, [X], #8
|
||||
fcmp d5, REGZERO
|
||||
beq KERNEL_F1_END_\@
|
||||
fabs d5, d5
|
||||
fcmp SCALE, d5
|
||||
bge KERNEL_F1_SCALE_GE_XI_\@
|
||||
fdiv d2, SCALE, d5
|
||||
fmul d2, d2, d2
|
||||
fmul d3, SSQ, d2
|
||||
fadd SSQ, REGONE, d3
|
||||
fmov SCALE, d5
|
||||
b KERNEL_F1_END_\@
|
||||
KERNEL_F1_SCALE_GE_XI_\@:
|
||||
fdiv d2, d5, SCALE
|
||||
fmla SSQ, d2, v2.d[0]
|
||||
#endif
|
||||
KERNEL_F1_END_\@:
|
||||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v1.2s}, [X], INC_X
|
||||
fmul v1.2s, v1.2s, v1.2s
|
||||
faddp TMPF, v1.2s
|
||||
fadd SSQ, SSQ, TMPF
|
||||
ldr s4, [X]
|
||||
fcmp s4, REGZERO
|
||||
beq KERNEL_S1_NEXT_\@
|
||||
fabs s4, s4
|
||||
fcmp SCALE, s4
|
||||
bge KERNEL_S1_SCALE_GE_XR_\@
|
||||
fdiv s2, SCALE, s4
|
||||
fmul s2, s2, s2
|
||||
fmul s3, SSQ, s2
|
||||
fadd SSQ, REGONE, s3
|
||||
fmov SCALE, s4
|
||||
b KERNEL_S1_NEXT_\@
|
||||
KERNEL_S1_SCALE_GE_XR_\@:
|
||||
fdiv s2, s4, SCALE
|
||||
fmla SSQ, s2, v2.s[0]
|
||||
KERNEL_S1_NEXT_\@:
|
||||
ldr s5, [X, #4]
|
||||
fcmp s5, REGZERO
|
||||
beq KERNEL_S1_END_\@
|
||||
fabs s5, s5
|
||||
fcmp SCALE, s5
|
||||
bge KERNEL_S1_SCALE_GE_XI_\@
|
||||
fdiv s2, SCALE, s5
|
||||
fmul s2, s2, s2
|
||||
fmul s3, SSQ, s2
|
||||
fadd SSQ, REGONE, s3
|
||||
fmov SCALE, s5
|
||||
b KERNEL_S1_END_\@
|
||||
KERNEL_S1_SCALE_GE_XI_\@:
|
||||
fdiv s2, s5, SCALE
|
||||
fmla SSQ, s2, v2.s[0]
|
||||
#else
|
||||
ld1 {v1.2d}, [X], INC_X
|
||||
fmul v1.2d, v1.2d, v1.2d
|
||||
faddp TMPF, v1.2d
|
||||
fadd SSQ, SSQ, TMPF
|
||||
ldr d4, [X]
|
||||
fcmp d4, REGZERO
|
||||
beq KERNEL_S1_NEXT_\@
|
||||
fabs d4, d4
|
||||
fcmp SCALE, d4
|
||||
bge KERNEL_S1_SCALE_GE_XR_\@
|
||||
fdiv d2, SCALE, d4
|
||||
fmul d2, d2, d2
|
||||
fmul d3, SSQ, d2
|
||||
fadd SSQ, REGONE, d3
|
||||
fmov SCALE, d4
|
||||
b KERNEL_S1_NEXT_\@
|
||||
KERNEL_S1_SCALE_GE_XR_\@:
|
||||
fdiv d2, d4, SCALE
|
||||
fmla SSQ, d2, v2.d[0]
|
||||
KERNEL_S1_NEXT_\@:
|
||||
ldr d5, [X, #8]
|
||||
fcmp d5, REGZERO
|
||||
beq KERNEL_S1_END_\@
|
||||
fabs d5, d5
|
||||
fcmp SCALE, d5
|
||||
bge KERNEL_S1_SCALE_GE_XI_\@
|
||||
fdiv d2, SCALE, d5
|
||||
fmul d2, d2, d2
|
||||
fmul d3, SSQ, d2
|
||||
fadd SSQ, REGONE, d3
|
||||
fmov SCALE, d5
|
||||
b KERNEL_S1_END_\@
|
||||
KERNEL_S1_SCALE_GE_XI_\@:
|
||||
fdiv d2, d5, SCALE
|
||||
fmla SSQ, d2, v2.d[0]
|
||||
#endif
|
||||
KERNEL_S1_END_\@:
|
||||
add X, X, INC_X
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F8
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
KERNEL_F1
|
||||
.endm
|
||||
|
||||
.macro INIT_S
|
||||
#if !defined(DOUBLE)
|
||||
lsl INC_X, INC_X, #3 // INC_X * SIZE
|
||||
#else
|
||||
lsl INC_X, INC_X, #4 // INC_X * SIZE
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/*******************************************************************************
|
||||
.macro INIT
|
||||
eor v1.16b, v1.16b, v1.16b // scale=0.0
|
||||
fmov SSQ, #1.0
|
||||
fmov REGONE, SSQ
|
||||
fmov REGZERO, SCALE
|
||||
.endm
|
||||
|
||||
/**************************************************************************************
|
||||
* End of macro definitions
|
||||
*******************************************************************************/
|
||||
**************************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
fmov SSQ, wzr
|
||||
fmov s5, SSQ
|
||||
#else
|
||||
fmov SSQ, xzr
|
||||
fmov d5, SSQ
|
||||
#endif
|
||||
.align 5
|
||||
|
||||
INIT
|
||||
|
||||
cmp N, #0
|
||||
ble nrm2_kernel_L999
|
||||
|
||||
cmp INC_X, #0
|
||||
beq nrm2_kernel_L999
|
||||
|
||||
cmp N, xzr
|
||||
ble nrm2_kernel_zero
|
||||
cmp INC_X, xzr
|
||||
ble nrm2_kernel_zero
|
||||
cmp INC_X, #1
|
||||
bne nrm2_kernel_S_BEGIN
|
||||
|
||||
nrm2_kernel_F_BEGIN:
|
||||
|
||||
asr I, N, #3
|
||||
asr I, N, #3 // I = N / 8
|
||||
cmp I, xzr
|
||||
beq nrm2_kernel_F1_INIT
|
||||
ble nrm2_kernel_F1
|
||||
|
||||
nrm2_kernel_F8:
|
||||
|
||||
KERNEL_F8
|
||||
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_F8
|
||||
|
||||
nrm2_kernel_F8_FINALIZE
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_F8
|
||||
|
||||
nrm2_kernel_F1:
|
||||
|
||||
ands I, N, #7
|
||||
ble nrm2_kernel_L999
|
||||
|
||||
|
||||
nrm2_kernel_F10:
|
||||
|
||||
KERNEL_F1
|
||||
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_F10
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_F10
|
||||
|
||||
b nrm2_kernel_L999
|
||||
|
||||
nrm2_kernel_F1_INIT:
|
||||
|
||||
b nrm2_kernel_F1
|
||||
|
||||
nrm2_kernel_S_BEGIN:
|
||||
|
||||
INIT_S
|
||||
|
||||
subs N, N, #1
|
||||
ble nrm2_kernel_L999
|
||||
mov I, N
|
||||
|
||||
asr I, N, #2
|
||||
cmp I, xzr
|
||||
ble nrm2_kernel_S1
|
||||
|
||||
nrm2_kernel_S4:
|
||||
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_S4
|
||||
|
||||
nrm2_kernel_S1:
|
||||
|
||||
ands I, N, #3
|
||||
ble nrm2_kernel_L999
|
||||
.align 5
|
||||
|
||||
nrm2_kernel_S10:
|
||||
|
||||
KERNEL_S1
|
||||
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_S10
|
||||
subs I, I, #1
|
||||
bne nrm2_kernel_S10
|
||||
|
||||
|
||||
nrm2_kernel_L999:
|
||||
fsqrt SSQ, SSQ
|
||||
ret
|
||||
fmul SSQ, SCALE, SSQ
|
||||
|
||||
nrm2_kernel_zero:
|
||||
ret
|
||||
|
||||
EPILOGUE
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define X x3 /* X vector address */
|
||||
#define INC_X x4 /* X stride */
|
||||
#define I x5 /* loop variable */
|
||||
#define X_COPY x6 /* Copy of X */
|
||||
|
||||
/*******************************************************************************
|
||||
* Macro definitions
|
||||
|
@ -50,43 +51,55 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro INIT
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
||||
fneg s2, DA_I
|
||||
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
|
||||
ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I
|
||||
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
||||
#else
|
||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
||||
fneg d2, DA_I
|
||||
ins v1.d[1], v2.d[0] // v1 = DA_I, DA_I
|
||||
ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I
|
||||
#endif
|
||||
|
||||
.endm
|
||||
|
||||
.macro KERNEL_F1
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v2.2s}, [X] // X1, X0
|
||||
ext v3.8b, v2.8b, v2.8b, #4 // X0, X1
|
||||
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
|
||||
fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v2.2s}, [X], #8
|
||||
fmul s3, DA_R, v2.s[0] // DA_R*X0
|
||||
fmul s5, DA_I, v2.s[1] // DA_I*X1
|
||||
fsub s3, s3, s5 // DA_R*X0-DA_I*X1
|
||||
|
||||
fmul s4, DA_I, v2.s[0] // DA_I*X0
|
||||
fmul s5, DA_R, v2.s[1] // DA_R*X1
|
||||
fadd s4, s4, s5 // DA_I*X0+DA_R*X1
|
||||
|
||||
ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v3.2s}, [X], #8
|
||||
#else
|
||||
ld1 {v2.2d}, [X] // X1, X0
|
||||
ext v3.16b, v2.16b, v2.16b, #8 // X0, X1
|
||||
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
|
||||
fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v2.2d}, [X], #16
|
||||
#endif
|
||||
fmul d3, DA_R, v2.d[0] // DA_R*X0
|
||||
fmul d5, DA_I, v2.d[1] // DA_I*X1
|
||||
fsub d3, d3, d5 // DA_R*X0-DA_I*X1
|
||||
|
||||
fmul d4, DA_I, v2.d[0] // DA_I*X0
|
||||
fmul d5, DA_R, v2.d[1] // DA_R*X1
|
||||
fadd d4, d4, d5 // DA_I*X0+DA_R*X1
|
||||
|
||||
ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v3.2d}, [X], #16
|
||||
#endif
|
||||
.endm
|
||||
|
||||
.macro KERNEL_INIT_F4
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
// Replicate the lower 2 floats into the upper 2 slots
|
||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R
|
||||
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I
|
||||
ins v16.s[0], v0.s[0]
|
||||
ins v16.s[1], v16.s[0]
|
||||
ins v16.d[1], v16.d[0]
|
||||
ins v17.s[0], v1.s[0]
|
||||
ins v17.s[1], v17.s[0]
|
||||
ins v17.d[1], v17.d[0]
|
||||
#else //DOUBLE
|
||||
ins v16.d[0], v0.d[0]
|
||||
ins v16.d[1], v16.d[0]
|
||||
ins v17.d[0], v1.d[0]
|
||||
ins v17.d[1], v17.d[0]
|
||||
#endif
|
||||
|
||||
.endm
|
||||
|
@ -94,46 +107,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.macro KERNEL_F4
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v2.4s,v3.4s}, [X] // V2 = X[3], X[2], X[1], X[0]
|
||||
// V3 = X[7], X[6], X[5], X[4]
|
||||
ld2 {v2.4s, v3.4s}, [X], #32
|
||||
|
||||
ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1]
|
||||
ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1]
|
||||
ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1]
|
||||
fmul v2.4s, v0.4s, v2.4s // X'[ix] += DA_R * X[ix]
|
||||
// X'[ix+1] += DA_R * X[ix+1]
|
||||
fmla v2.4s, v1.4s, v6.4s // X'[ix] += -DA_I * X[ix+1]
|
||||
// X'[ix+1] += DA_I * X[ix]
|
||||
fmul v4.4s, v2.4s, v16.4s
|
||||
fmul v6.4s, v3.4s, v17.4s
|
||||
fsub v4.4s, v4.4s, v6.4s
|
||||
|
||||
ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5]
|
||||
ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5]
|
||||
ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5]
|
||||
fmul v3.4s, v0.4s, v3.4s // X'[ix] += DA_R * X[ix]
|
||||
// X'[ix+1] += DA_R * X[ix+1]
|
||||
fmla v3.4s, v1.4s, v7.4s // X'[ix] += -DA_I * X[ix+1]
|
||||
// X'[ix+1] += DA_I * X[ix]
|
||||
fmul v5.4s, v2.4s, v17.4s
|
||||
fmul v6.4s, v3.4s, v16.4s
|
||||
fadd v5.4s, v5.4s, v6.4s
|
||||
|
||||
st1 {v2.4s,v3.4s}, [X], #32
|
||||
st2 {v4.4s, v5.4s}, [X_COPY], #32
|
||||
#else // DOUBLE
|
||||
ld1 {v2.2d,v3.2d,v4.2d,v5.2d}, [X] // CX0, CX1, CX2, CX3
|
||||
ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1]
|
||||
ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1]
|
||||
ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1]
|
||||
ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1]
|
||||
ld2 {v2.2d, v3.2d}, [X], #32
|
||||
|
||||
fmul v2.2d, v0.2d, v2.2d
|
||||
fmla v2.2d, v1.2d, v20.2d
|
||||
fmul v4.2d, v2.2d, v16.2d
|
||||
fmul v6.2d, v3.2d, v17.2d
|
||||
fsub v4.2d, v4.2d, v6.2d
|
||||
fmul v5.2d, v2.2d, v17.2d
|
||||
fmul v6.2d, v3.2d, v16.2d
|
||||
fadd v5.2d, v5.2d, v6.2d
|
||||
|
||||
fmul v3.2d, v0.2d, v3.2d
|
||||
fmla v3.2d, v1.2d, v21.2d
|
||||
st1 {v2.2d,v3.2d}, [X], #32
|
||||
st2 {v4.2d, v5.2d}, [X_COPY], #32
|
||||
|
||||
fmul v4.2d, v0.2d, v4.2d
|
||||
fmla v4.2d, v1.2d, v22.2d
|
||||
ld2 {v18.2d, v19.2d}, [X], #32
|
||||
|
||||
fmul v5.2d, v0.2d, v5.2d
|
||||
fmla v5.2d, v1.2d, v23.2d
|
||||
st1 {v4.2d,v5.2d}, [X], #32
|
||||
fmul v20.2d, v18.2d, v16.2d
|
||||
fmul v6.2d, v19.2d, v17.2d
|
||||
fsub v20.2d, v20.2d, v6.2d
|
||||
fmul v21.2d, v18.2d, v17.2d
|
||||
fmul v6.2d, v19.2d, v16.2d
|
||||
fadd v21.2d, v21.2d, v6.2d
|
||||
|
||||
st2 {v20.2d, v21.2d}, [X_COPY], #32
|
||||
#endif
|
||||
PRFM PLDL1KEEP, [X, #1024]
|
||||
.endm
|
||||
|
@ -149,21 +155,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
.endm
|
||||
|
||||
.macro KERNEL_S1
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v2.2s}, [X] // X1, X0
|
||||
ext v3.8b, v2.8b, v2.8b, #4 // X0, X1
|
||||
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
|
||||
fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v2.2s}, [X], INC_X
|
||||
fmul s3, DA_R, v2.s[0] // DA_R*X0
|
||||
fmul s5, DA_I, v2.s[1] // DA_I*X1
|
||||
fsub s3, s3, s5 // DA_R*X0-DA_I*X1
|
||||
|
||||
fmul s4, DA_I, v2.s[0] // DA_I*X0
|
||||
fmul s5, DA_R, v2.s[1] // DA_R*X1
|
||||
fadd s4, s4, s5 // DA_I*X0+DA_R*X1
|
||||
|
||||
ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v3.2s}, [X], INC_X
|
||||
#else
|
||||
ld1 {v2.2d}, [X] // X1, X0
|
||||
ext v3.16b, v2.16b, v2.16b, #8 // X0, X1
|
||||
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
|
||||
fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v2.2d}, [X], INC_X
|
||||
#endif
|
||||
fmul d3, DA_R, v2.d[0] // DA_R*X0
|
||||
fmul d5, DA_I, v2.d[1] // DA_I*X1
|
||||
fsub d3, d3, d5 // DA_R*X0-DA_I*X1
|
||||
|
||||
fmul d4, DA_I, v2.d[0] // DA_I*X0
|
||||
fmul d5, DA_R, v2.d[1] // DA_R*X1
|
||||
fadd d4, d4, d5 // DA_I*X0+DA_R*X1
|
||||
|
||||
ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
|
||||
st1 {v3.2d}, [X], INC_X
|
||||
#endif
|
||||
.endm
|
||||
|
||||
/*******************************************************************************
|
||||
|
@ -171,21 +187,54 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
*******************************************************************************/
|
||||
|
||||
PROLOGUE
|
||||
|
||||
b zscal_begin
|
||||
data_ar:
|
||||
.word 0x3e44fae6
|
||||
data_ai:
|
||||
.word 0x3d320fa2
|
||||
data_xr:
|
||||
.word 0x3f4baff1
|
||||
data_xi:
|
||||
.word 0xbe8ef0bd
|
||||
|
||||
zscal_begin:
|
||||
|
||||
ldr s20, data_ar
|
||||
ldr s21, data_ai
|
||||
ldr s22, data_xr
|
||||
ldr s23, data_xi
|
||||
|
||||
fmul s24, s22, s21
|
||||
fmla s24, s23, v20.s[0]
|
||||
|
||||
fmul s25, s22, s21
|
||||
fmul s26, s23, s20
|
||||
fadd s25, s25, s26
|
||||
|
||||
mov X_COPY, X
|
||||
|
||||
cmp N, xzr
|
||||
ble zscal_kernel_L999
|
||||
|
||||
fcmp DA_R, #0.0
|
||||
bne zscal_kernel_1
|
||||
bne zscal_kernel_R_non_zero
|
||||
|
||||
fcmp DA_I, #0.0
|
||||
beq zscal_kernel_zero
|
||||
beq zscal_kernel_RI_zero
|
||||
|
||||
// TODO: special case DA_R == 0 && DA_I != 0
|
||||
b zscal_kernel_R_zero
|
||||
|
||||
zscal_kernel_1:
|
||||
zscal_kernel_R_non_zero:
|
||||
|
||||
// TODO: special case DA_R != 0 && DA_I == 0
|
||||
fcmp DA_I, #0.0
|
||||
beq zscal_kernel_I_zero
|
||||
|
||||
/*******************************************************************************
|
||||
* A_R != 0 && A_I != 0
|
||||
*******************************************************************************/
|
||||
|
||||
zscal_kernel_RI_non_zero:
|
||||
|
||||
INIT
|
||||
|
||||
|
@ -257,16 +306,85 @@ zscal_kernel_L999:
|
|||
mov w0, wzr
|
||||
ret
|
||||
|
||||
zscal_kernel_zero:
|
||||
/*******************************************************************************
|
||||
* A_R == 0 && A_I != 0
|
||||
*******************************************************************************/
|
||||
|
||||
zscal_kernel_R_zero:
|
||||
INIT_S
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub s2, s2, DA_I
|
||||
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
|
||||
#else
|
||||
eor v2.16b, v2.16b, v2.16b
|
||||
fsub d2, d2, DA_I
|
||||
ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I
|
||||
#endif
|
||||
|
||||
zscal_kernel_R_zero_1:
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v2.2s}, [X] // X1, X0
|
||||
fmul v2.2s, v2.2s, v1.2s // -DA_I*X1, DA_I*X0
|
||||
ext v2.8b, v2.8b, v2.8b, #4 // DA_I*X0, -DA_I*X1
|
||||
st1 {v2.2s}, [X]
|
||||
#else
|
||||
ld1 {v2.2d}, [X] // X1, X0
|
||||
fmul v2.2d, v2.2d, v1.2d // -DA_I*X1, DA_I*X0
|
||||
ext v2.16b, v2.16b, v2.16b, #8 // DA_I*X0, -DA_I*X1
|
||||
st1 {v2.2d}, [X]
|
||||
#endif
|
||||
add X, X, INC_X
|
||||
subs N, N, #1
|
||||
bne zscal_kernel_R_zero_1
|
||||
|
||||
mov w0, wzr
|
||||
ret
|
||||
|
||||
/*******************************************************************************
|
||||
* A_R != 0 && A_I == 0
|
||||
*******************************************************************************/
|
||||
|
||||
zscal_kernel_I_zero:
|
||||
INIT_S
|
||||
#if !defined(DOUBLE)
|
||||
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
|
||||
#else
|
||||
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
|
||||
#endif
|
||||
|
||||
zscal_kernel_I_zero_1:
|
||||
#if !defined(DOUBLE)
|
||||
ld1 {v2.2s}, [X] // X1, X0
|
||||
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
|
||||
st1 {v2.2s}, [X]
|
||||
#else
|
||||
ld1 {v2.2d}, [X] // X1, X0
|
||||
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
|
||||
st1 {v2.2d}, [X]
|
||||
#endif
|
||||
add X, X, INC_X
|
||||
subs N, N, #1
|
||||
bne zscal_kernel_I_zero_1
|
||||
|
||||
mov w0, wzr
|
||||
ret
|
||||
|
||||
/*******************************************************************************
|
||||
* A_R == 0 && A_I == 0
|
||||
*******************************************************************************/
|
||||
|
||||
zscal_kernel_RI_zero:
|
||||
|
||||
INIT_S
|
||||
|
||||
zscal_kernel_Z1:
|
||||
zscal_kernel_RI_zero_1:
|
||||
|
||||
stp DA_R, DA_I, [X]
|
||||
add X, X, INC_X
|
||||
subs N, N, #1
|
||||
bne zscal_kernel_Z1
|
||||
subs N, N, #1
|
||||
bne zscal_kernel_RI_zero_1
|
||||
|
||||
mov w0, wzr
|
||||
ret
|
||||
|
|
|
@ -187,73 +187,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
fmul v16.2d, v0.2d, v8.2d[0]
|
||||
OP_ii v16.2d, v1.2d, v9.2d[0]
|
||||
fmul v17.2d, v0.2d, v9.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v17.2d, v17.2d
|
||||
eor v17.16b, v17.16b, v17.16b
|
||||
fmls v17.2d, v0.2d, v9.2d[0]
|
||||
#else
|
||||
fmul v17.2d, v0.2d, v9.2d[0]
|
||||
#endif
|
||||
OP_ir v17.2d, v1.2d, v8.2d[0]
|
||||
|
||||
fmul v18.2d, v2.2d, v8.2d[0]
|
||||
OP_ii v18.2d, v3.2d, v9.2d[0]
|
||||
fmul v19.2d, v2.2d, v9.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v19.2d, v19.2d
|
||||
eor v19.16b, v19.16b, v19.16b
|
||||
fmls v19.2d, v2.2d, v9.2d[0]
|
||||
#else
|
||||
fmul v19.2d, v2.2d, v9.2d[0]
|
||||
#endif
|
||||
OP_ir v19.2d, v3.2d, v8.2d[0]
|
||||
|
||||
fmul v20.2d, v0.2d, v8.2d[1]
|
||||
OP_ii v20.2d, v1.2d, v9.2d[1]
|
||||
fmul v21.2d, v0.2d, v9.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v21.2d, v21.2d
|
||||
eor v21.16b, v21.16b, v21.16b
|
||||
fmls v21.2d, v0.2d, v9.2d[1]
|
||||
#else
|
||||
fmul v21.2d, v0.2d, v9.2d[1]
|
||||
#endif
|
||||
OP_ir v21.2d, v1.2d, v8.2d[1]
|
||||
|
||||
fmul v22.2d, v2.2d, v8.2d[1]
|
||||
OP_ii v22.2d, v3.2d, v9.2d[1]
|
||||
fmul v23.2d, v2.2d, v9.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v23.2d, v23.2d
|
||||
eor v23.16b, v23.16b, v23.16b
|
||||
fmls v23.2d, v2.2d, v9.2d[1]
|
||||
#else
|
||||
fmul v23.2d, v2.2d, v9.2d[1]
|
||||
#endif
|
||||
OP_ir v23.2d, v3.2d, v8.2d[1]
|
||||
|
||||
fmul v24.2d, v0.2d, v10.2d[0]
|
||||
OP_ii v24.2d, v1.2d, v11.2d[0]
|
||||
fmul v25.2d, v0.2d, v11.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v25.2d, v25.2d
|
||||
eor v25.16b, v25.16b, v25.16b
|
||||
fmls v25.2d, v0.2d, v11.2d[0]
|
||||
#else
|
||||
fmul v25.2d, v0.2d, v11.2d[0]
|
||||
#endif
|
||||
OP_ir v25.2d, v1.2d, v10.2d[0]
|
||||
|
||||
fmul v26.2d, v2.2d, v10.2d[0]
|
||||
OP_ii v26.2d, v3.2d, v11.2d[0]
|
||||
fmul v27.2d, v2.2d, v11.2d[0]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v27.2d, v27.2d
|
||||
eor v27.16b, v27.16b, v27.16b
|
||||
fmls v27.2d, v2.2d, v11.2d[0]
|
||||
#else
|
||||
fmul v27.2d, v2.2d, v11.2d[0]
|
||||
#endif
|
||||
OP_ir v27.2d, v3.2d, v10.2d[0]
|
||||
|
||||
fmul v28.2d, v0.2d, v10.2d[1]
|
||||
OP_ii v28.2d, v1.2d, v11.2d[1]
|
||||
fmul v29.2d, v0.2d, v11.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v29.2d, v29.2d
|
||||
eor v29.16b, v29.16b, v29.16b
|
||||
fmls v29.2d, v0.2d, v11.2d[1]
|
||||
#else
|
||||
fmul v29.2d, v0.2d, v11.2d[1]
|
||||
#endif
|
||||
OP_ir v29.2d, v1.2d, v10.2d[1]
|
||||
|
||||
fmul v30.2d, v2.2d, v10.2d[1]
|
||||
OP_ii v30.2d, v3.2d, v11.2d[1]
|
||||
fmul v31.2d, v2.2d, v11.2d[1]
|
||||
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
|
||||
defined(RR) || defined(RC) || defined(CR) || defined(CC)
|
||||
fneg v31.2d, v31.2d
|
||||
eor v31.16b, v31.16b, v31.16b
|
||||
fmls v31.2d, v2.2d, v11.2d[1]
|
||||
#else
|
||||
fmul v31.2d, v2.2d, v11.2d[1]
|
||||
#endif
|
||||
OP_ir v31.2d, v3.2d, v10.2d[1]
|
||||
|
||||
|
|
|
@ -137,8 +137,13 @@ endif()
|
|||
|
||||
|
||||
add_library(blas ${ALLOBJ})
|
||||
if(UNIX)
|
||||
target_link_libraries(blas m)
|
||||
endif()
|
||||
#if(UNIX)
|
||||
# target_link_libraries(blas m)
|
||||
#endif()
|
||||
set_target_properties(
|
||||
blas PROPERTIES
|
||||
VERSION ${LAPACK_VERSION}
|
||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||
)
|
||||
target_link_libraries(blas)
|
||||
lapack_install_library(blas)
|
||||
|
|
|
@ -23,8 +23,9 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> forms the dot product of two vectors, conjugating the first
|
||||
*> vector.
|
||||
*> CDOTC forms the dot product of two complex vectors
|
||||
*> CDOTC = X^H * Y
|
||||
*>
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -35,7 +36,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex_blas_level1
|
||||
*
|
||||
|
@ -51,10 +52,10 @@
|
|||
* =====================================================================
|
||||
COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,INCY,N
|
||||
|
|
|
@ -23,7 +23,9 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> CDOTU forms the dot product of two vectors.
|
||||
*> CDOTU forms the dot product of two complex vectors
|
||||
*> CDOTU = X^T * Y
|
||||
*>
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +36,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex_blas_level1
|
||||
*
|
||||
|
@ -50,10 +52,10 @@
|
|||
* =====================================================================
|
||||
COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,INCY,N
|
||||
|
|
|
@ -165,7 +165,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex_blas_level2
|
||||
*
|
||||
|
@ -187,10 +187,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE CGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX ALPHA,BETA
|
||||
|
@ -319,26 +319,22 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
IF (J.GT.KU) KY = KY + INCY
|
||||
80 CONTINUE
|
||||
|
|
|
@ -166,7 +166,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex_blas_level3
|
||||
*
|
||||
|
@ -187,10 +187,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||
*
|
||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX ALPHA,BETA
|
||||
|
@ -317,12 +317,10 @@
|
|||
60 CONTINUE
|
||||
END IF
|
||||
DO 80 L = 1,K
|
||||
IF (B(L,J).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
80 CONTINUE
|
||||
90 CONTINUE
|
||||
ELSE IF (CONJA) THEN
|
||||
|
@ -376,17 +374,15 @@
|
|||
170 CONTINUE
|
||||
END IF
|
||||
DO 190 L = 1,K
|
||||
IF (B(J,L).NE.ZERO) THEN
|
||||
TEMP = ALPHA*CONJG(B(J,L))
|
||||
DO 180 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
180 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*CONJG(B(J,L))
|
||||
DO 180 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
180 CONTINUE
|
||||
190 CONTINUE
|
||||
200 CONTINUE
|
||||
ELSE
|
||||
*
|
||||
* Form C := alpha*A*B**T + beta*C
|
||||
* Form C := alpha*A*B**T + beta*C
|
||||
*
|
||||
DO 250 J = 1,N
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
|
@ -399,12 +395,10 @@
|
|||
220 CONTINUE
|
||||
END IF
|
||||
DO 240 L = 1,K
|
||||
IF (B(J,L).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 230 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
230 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 230 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
230 CONTINUE
|
||||
240 CONTINUE
|
||||
250 CONTINUE
|
||||
END IF
|
||||
|
|
|
@ -136,7 +136,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex_blas_level2
|
||||
*
|
||||
|
@ -158,10 +158,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX ALPHA,BETA
|
||||
|
@ -285,24 +285,20 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> DCABS1 computes absolute value of a double complex number
|
||||
*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -32,17 +32,17 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup double_blas_level1
|
||||
*
|
||||
* =====================================================================
|
||||
DOUBLE PRECISION FUNCTION DCABS1(Z)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX*16 Z
|
||||
|
|
|
@ -163,7 +163,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup double_blas_level2
|
||||
*
|
||||
|
@ -185,10 +185,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION ALPHA,BETA
|
||||
|
@ -312,26 +312,22 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
IF (J.GT.KU) KY = KY + INCY
|
||||
80 CONTINUE
|
||||
|
|
|
@ -166,7 +166,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup double_blas_level3
|
||||
*
|
||||
|
@ -187,10 +187,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||
*
|
||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION ALPHA,BETA
|
||||
|
@ -311,12 +311,10 @@
|
|||
60 CONTINUE
|
||||
END IF
|
||||
DO 80 L = 1,K
|
||||
IF (B(L,J).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
80 CONTINUE
|
||||
90 CONTINUE
|
||||
ELSE
|
||||
|
@ -353,12 +351,10 @@
|
|||
140 CONTINUE
|
||||
END IF
|
||||
DO 160 L = 1,K
|
||||
IF (B(J,L).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 150 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
150 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 150 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
150 CONTINUE
|
||||
160 CONTINUE
|
||||
170 CONTINUE
|
||||
ELSE
|
||||
|
|
|
@ -134,7 +134,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup double_blas_level2
|
||||
*
|
||||
|
@ -156,10 +156,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
DOUBLE PRECISION ALPHA,BETA
|
||||
|
@ -278,24 +278,20 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
|
|
|
@ -23,7 +23,8 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> DZASUM takes the sum of the absolute values.
|
||||
*> DZASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
|
||||
*> returns a single precision result.
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +35,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup double_blas_level1
|
||||
*
|
||||
|
@ -51,10 +52,10 @@
|
|||
* =====================================================================
|
||||
DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,N
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> ICAMAX finds the index of element having max. absolute value.
|
||||
*> ICAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)|
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +34,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup aux_blas
|
||||
*
|
||||
|
@ -51,10 +51,10 @@
|
|||
* =====================================================================
|
||||
INTEGER FUNCTION ICAMAX(N,CX,INCX)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,N
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> IDAMAX finds the index of element having max. absolute value.
|
||||
*> IDAMAX finds the index of the first element having maximum absolute value.
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +34,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup aux_blas
|
||||
*
|
||||
|
@ -51,10 +51,10 @@
|
|||
* =====================================================================
|
||||
INTEGER FUNCTION IDAMAX(N,DX,INCX)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,N
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> ISAMAX finds the index of element having max. absolute value.
|
||||
*> ISAMAX finds the index of the first element having maximum absolute value.
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +34,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup aux_blas
|
||||
*
|
||||
|
@ -51,10 +51,10 @@
|
|||
* =====================================================================
|
||||
INTEGER FUNCTION ISAMAX(N,SX,INCX)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,N
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> IZAMAX finds the index of element having max. absolute value.
|
||||
*> IZAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)|
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +34,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup aux_blas
|
||||
*
|
||||
|
@ -51,10 +51,10 @@
|
|||
* =====================================================================
|
||||
INTEGER FUNCTION IZAMAX(N,ZX,INCX)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,N
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> SCABS1 computes absolute value of a complex number
|
||||
*> SCABS1 computes |Re(.)| + |Im(.)| of a complex number
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -31,17 +31,17 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup single_blas_level1
|
||||
*
|
||||
* =====================================================================
|
||||
REAL FUNCTION SCABS1(Z)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX Z
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> SCASUM takes the sum of the absolute values of a complex vector and
|
||||
*> SCASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
|
||||
*> returns a single precision result.
|
||||
*> \endverbatim
|
||||
*
|
||||
|
@ -35,7 +35,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup single_blas_level1
|
||||
*
|
||||
|
@ -52,10 +52,10 @@
|
|||
* =====================================================================
|
||||
REAL FUNCTION SCASUM(N,CX,INCX)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,N
|
||||
|
|
|
@ -163,7 +163,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup single_blas_level2
|
||||
*
|
||||
|
@ -185,10 +185,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE SGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
REAL ALPHA,BETA
|
||||
|
@ -312,26 +312,22 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
IF (J.GT.KU) KY = KY + INCY
|
||||
80 CONTINUE
|
||||
|
|
|
@ -166,7 +166,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup single_blas_level3
|
||||
*
|
||||
|
@ -187,10 +187,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||
*
|
||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
REAL ALPHA,BETA
|
||||
|
@ -311,12 +311,10 @@
|
|||
60 CONTINUE
|
||||
END IF
|
||||
DO 80 L = 1,K
|
||||
IF (B(L,J).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
80 CONTINUE
|
||||
90 CONTINUE
|
||||
ELSE
|
||||
|
@ -353,12 +351,10 @@
|
|||
140 CONTINUE
|
||||
END IF
|
||||
DO 160 L = 1,K
|
||||
IF (B(J,L).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 150 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
150 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 150 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
150 CONTINUE
|
||||
160 CONTINUE
|
||||
170 CONTINUE
|
||||
ELSE
|
||||
|
|
|
@ -134,7 +134,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup single_blas_level2
|
||||
*
|
||||
|
@ -156,10 +156,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
REAL ALPHA,BETA
|
||||
|
@ -278,24 +278,20 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
|
|
|
@ -23,7 +23,9 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> ZDOTC forms the dot product of a vector.
|
||||
*> ZDOTC forms the dot product of two complex vectors
|
||||
*> ZDOTC = X^H * Y
|
||||
*>
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +36,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex16_blas_level1
|
||||
*
|
||||
|
@ -50,10 +52,10 @@
|
|||
* =====================================================================
|
||||
COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,INCY,N
|
||||
|
|
|
@ -23,7 +23,9 @@
|
|||
*>
|
||||
*> \verbatim
|
||||
*>
|
||||
*> ZDOTU forms the dot product of two vectors.
|
||||
*> ZDOTU forms the dot product of two complex vectors
|
||||
*> ZDOTU = X^T * Y
|
||||
*>
|
||||
*> \endverbatim
|
||||
*
|
||||
* Authors:
|
||||
|
@ -34,7 +36,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex16_blas_level1
|
||||
*
|
||||
|
@ -50,10 +52,10 @@
|
|||
* =====================================================================
|
||||
COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY)
|
||||
*
|
||||
* -- Reference BLAS level1 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level1 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
INTEGER INCX,INCY,N
|
||||
|
|
|
@ -165,7 +165,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex16_blas_level2
|
||||
*
|
||||
|
@ -187,10 +187,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE ZGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX*16 ALPHA,BETA
|
||||
|
@ -319,26 +319,22 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
K = KUP1 - J
|
||||
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(I) = Y(I) + TEMP*A(K+I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
K = KUP1 - J
|
||||
DO 70 I = MAX(1,J-KU),MIN(M,J+KL)
|
||||
Y(IY) = Y(IY) + TEMP*A(K+I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
IF (J.GT.KU) KY = KY + INCY
|
||||
80 CONTINUE
|
||||
|
|
|
@ -166,7 +166,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex16_blas_level3
|
||||
*
|
||||
|
@ -187,10 +187,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
|
||||
*
|
||||
* -- Reference BLAS level3 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level3 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX*16 ALPHA,BETA
|
||||
|
@ -317,12 +317,10 @@
|
|||
60 CONTINUE
|
||||
END IF
|
||||
DO 80 L = 1,K
|
||||
IF (B(L,J).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(L,J)
|
||||
DO 70 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
70 CONTINUE
|
||||
80 CONTINUE
|
||||
90 CONTINUE
|
||||
ELSE IF (CONJA) THEN
|
||||
|
@ -376,17 +374,15 @@
|
|||
170 CONTINUE
|
||||
END IF
|
||||
DO 190 L = 1,K
|
||||
IF (B(J,L).NE.ZERO) THEN
|
||||
TEMP = ALPHA*DCONJG(B(J,L))
|
||||
DO 180 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
180 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*DCONJG(B(J,L))
|
||||
DO 180 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
180 CONTINUE
|
||||
190 CONTINUE
|
||||
200 CONTINUE
|
||||
ELSE
|
||||
*
|
||||
* Form C := alpha*A*B**T + beta*C
|
||||
* Form C := alpha*A*B**T + beta*C
|
||||
*
|
||||
DO 250 J = 1,N
|
||||
IF (BETA.EQ.ZERO) THEN
|
||||
|
@ -399,12 +395,10 @@
|
|||
220 CONTINUE
|
||||
END IF
|
||||
DO 240 L = 1,K
|
||||
IF (B(J,L).NE.ZERO) THEN
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 230 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
230 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*B(J,L)
|
||||
DO 230 I = 1,M
|
||||
C(I,J) = C(I,J) + TEMP*A(I,L)
|
||||
230 CONTINUE
|
||||
240 CONTINUE
|
||||
250 CONTINUE
|
||||
END IF
|
||||
|
|
|
@ -136,7 +136,7 @@
|
|||
*> \author Univ. of Colorado Denver
|
||||
*> \author NAG Ltd.
|
||||
*
|
||||
*> \date November 2011
|
||||
*> \date November 2015
|
||||
*
|
||||
*> \ingroup complex16_blas_level2
|
||||
*
|
||||
|
@ -158,10 +158,10 @@
|
|||
* =====================================================================
|
||||
SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
|
||||
*
|
||||
* -- Reference BLAS level2 routine (version 3.4.0) --
|
||||
* -- Reference BLAS level2 routine (version 3.6.0) --
|
||||
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
|
||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||
* November 2011
|
||||
* November 2015
|
||||
*
|
||||
* .. Scalar Arguments ..
|
||||
COMPLEX*16 ALPHA,BETA
|
||||
|
@ -285,24 +285,20 @@
|
|||
JX = KX
|
||||
IF (INCY.EQ.1) THEN
|
||||
DO 60 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
DO 50 I = 1,M
|
||||
Y(I) = Y(I) + TEMP*A(I,J)
|
||||
50 CONTINUE
|
||||
JX = JX + INCX
|
||||
60 CONTINUE
|
||||
ELSE
|
||||
DO 80 J = 1,N
|
||||
IF (X(JX).NE.ZERO) THEN
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
END IF
|
||||
TEMP = ALPHA*X(JX)
|
||||
IY = KY
|
||||
DO 70 I = 1,M
|
||||
Y(IY) = Y(IY) + TEMP*A(I,J)
|
||||
IY = IY + INCY
|
||||
70 CONTINUE
|
||||
JX = JX + INCX
|
||||
80 CONTINUE
|
||||
END IF
|
||||
|
|
|
@ -30,17 +30,16 @@ macro(add_blas_test name src)
|
|||
get_filename_component(baseNAME ${src} NAME_WE)
|
||||
set(TEST_INPUT "${LAPACK_SOURCE_DIR}/BLAS/${baseNAME}.in")
|
||||
add_executable(${name} ${src})
|
||||
get_target_property(TEST_LOC ${name} LOCATION)
|
||||
target_link_libraries(${name} blas)
|
||||
if(EXISTS "${TEST_INPUT}")
|
||||
add_test(BLAS-${name} "${CMAKE_COMMAND}"
|
||||
-DTEST=${TEST_LOC}
|
||||
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
|
||||
-DTEST=$<TARGET_FILE:${name}>
|
||||
-DINPUT=${TEST_INPUT}
|
||||
-DINTDIR=${CMAKE_CFG_INTDIR}
|
||||
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
|
||||
else()
|
||||
add_test(BLAS-${name} "${CMAKE_COMMAND}"
|
||||
-DTEST=${TEST_LOC}
|
||||
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
|
||||
-DTEST=$<TARGET_FILE:${name}>
|
||||
-DINTDIR=${CMAKE_CFG_INTDIR}
|
||||
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
|
||||
endif()
|
||||
|
|
|
@ -120,7 +120,7 @@
|
|||
REAL RZERO
|
||||
PARAMETER ( RZERO = 0.0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -102,7 +102,7 @@
|
|||
REAL RZERO
|
||||
PARAMETER ( RZERO = 0.0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 128 )
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
DOUBLE PRECISION ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -97,7 +97,7 @@
|
|||
DOUBLE PRECISION ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 128 )
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -117,7 +117,7 @@
|
|||
REAL ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -97,7 +97,7 @@
|
|||
REAL ZERO, ONE
|
||||
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 128 )
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -121,7 +121,7 @@
|
|||
DOUBLE PRECISION RZERO
|
||||
PARAMETER ( RZERO = 0.0D0 )
|
||||
INTEGER NMAX, INCMAX
|
||||
PARAMETER ( NMAX = 128, INCMAX = 2 )
|
||||
PARAMETER ( NMAX = 65, INCMAX = 2 )
|
||||
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
|
||||
$ NALMAX = 7, NBEMAX = 7 )
|
||||
|
|
|
@ -104,7 +104,7 @@
|
|||
DOUBLE PRECISION RZERO
|
||||
PARAMETER ( RZERO = 0.0D0 )
|
||||
INTEGER NMAX
|
||||
PARAMETER ( NMAX = 128 )
|
||||
PARAMETER ( NMAX = 65 )
|
||||
INTEGER NIDMAX, NALMAX, NBEMAX
|
||||
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
|
||||
* .. Local Scalars ..
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
message(STATUS "CBLAS enable")
|
||||
enable_language(C)
|
||||
|
||||
set(LAPACK_INSTALL_EXPORT_NAME cblas-targets)
|
||||
|
||||
# Create a header file cblas.h for the routines called in my C programs
|
||||
include(FortranCInterface)
|
||||
FortranCInterface_HEADER( ${CMAKE_CURRENT_SOURCE_DIR}/include/cblas_mangling.h
|
||||
MACRO_NAMESPACE "F77_"
|
||||
SYMBOL_NAMESPACE "F77_" )
|
||||
|
||||
# Old way to detect mangling
|
||||
#include(FortranMangling)
|
||||
#FORTRAN_MANGLING(CDEFS)
|
||||
#set(CDEFS ${CDEFS} CACHE STRING "Fortran Mangling" FORCE)
|
||||
#MESSAGE(STATUS "=========")
|
||||
|
||||
# --------------------------------------------------
|
||||
# Compiler Flags
|
||||
#ADD_DEFINITIONS( "-D${CDEFS}")
|
||||
|
||||
|
||||
include_directories( include )
|
||||
add_subdirectory(include)
|
||||
add_subdirectory(src)
|
||||
|
||||
macro(append_subdir_files variable dirname)
|
||||
get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable})
|
||||
foreach(depfile ${holder})
|
||||
list(APPEND ${variable} "${dirname}/${depfile}")
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
append_subdir_files(CBLAS_INCLUDE "include")
|
||||
INSTALL( FILES ${CBLAS_INCLUDE} DESTINATION include )
|
||||
|
||||
# --------------------------------------------------
|
||||
if(BUILD_TESTING)
|
||||
add_subdirectory(testing)
|
||||
add_subdirectory(examples)
|
||||
endif(BUILD_TESTING)
|
||||
|
||||
if(NOT BLAS_FOUND)
|
||||
set(ALL_TARGETS ${ALL_TARGETS} blas)
|
||||
endif(NOT BLAS_FOUND)
|
||||
|
||||
# Export cblas targets from the
|
||||
# install tree, if any.
|
||||
set(_cblas_config_install_guard_target "")
|
||||
if(ALL_TARGETS)
|
||||
install(EXPORT cblas-targets
|
||||
DESTINATION lib/cmake/cblas-${LAPACK_VERSION})
|
||||
# Choose one of the cblas targets to use as a guard for
|
||||
# cblas-config.cmake to load targets from the install tree.
|
||||
list(GET ALL_TARGETS 0 _cblas_config_install_guard_target)
|
||||
endif()
|
||||
|
||||
# Export cblas targets from the build tree, if any.
|
||||
set(_cblas_config_build_guard_target "")
|
||||
if(ALL_TARGETS)
|
||||
export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake)
|
||||
|
||||
# Choose one of the cblas targets to use as a guard
|
||||
# for cblas-config.cmake to load targets from the build tree.
|
||||
list(GET ALL_TARGETS 0 _cblas_config_build_guard_target)
|
||||
endif()
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-version.cmake.in
|
||||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY)
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-build.cmake.in
|
||||
${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY)
|
||||
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cblas.pc
|
||||
DESTINATION ${PKG_CONFIG_DIR}
|
||||
)
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY)
|
||||
install(FILES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake
|
||||
${LAPACK_BINARY_DIR}/cblas-config-version.cmake
|
||||
DESTINATION lib/cmake/cblas-${LAPACK_VERSION}
|
||||
)
|
||||
|
||||
#install(EXPORT cblas-targets
|
||||
# DESTINATION lib/cmake/cblas-${LAPACK_VERSION})
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
include ../make.inc
|
||||
|
||||
all:
|
||||
cd include && cp cblas_mangling_with_flags.h cblas_mangling.h
|
||||
cd src && $(MAKE) all
|
||||
|
||||
|
||||
clean: cleanlib
|
||||
|
||||
cleanlib:
|
||||
cd src && $(MAKE) clean
|
||||
|
||||
cleanexe:
|
||||
cd testing && $(MAKE) cleanexe
|
||||
|
||||
cleanall: clean cleanexe
|
||||
rm -f $(CBLASLIB)
|
||||
cd examples && rm -f *.o cblas_ex1 cblas_ex2
|
||||
|
||||
cblas_testing:
|
||||
cd testing && $(MAKE) all
|
||||
|
||||
runtst:
|
||||
cd testing && $(MAKE) run
|
||||
|
||||
example: all
|
||||
cd examples && make all
|
|
@ -0,0 +1,49 @@
|
|||
#
|
||||
# Makefile.LINUX
|
||||
#
|
||||
#
|
||||
# If you compile, change the name to Makefile.in.
|
||||
#
|
||||
#
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Shell
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Platform
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
PLAT = LINUX
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Libraries and includes
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
BLLIB = $(home)/lib/librefblas.a
|
||||
CBLIB = ../lib/libcblas.a
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Compilers
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CC = gcc
|
||||
FC = gfortran
|
||||
LOADER = $(FC)
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Flags for Compilers
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CFLAGS = -O3 -DADD_
|
||||
FFLAGS = -O3
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Archive programs and flags
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
ARCH = ar
|
||||
ARCHFLAGS = cr
|
||||
RANLIB = ranlib
|
|
@ -0,0 +1,59 @@
|
|||
INSTALLATION
|
||||
|
||||
Make sure to set these variables appropriately in your Make.inc in the LAPACK folder:
|
||||
|
||||
CBLASLIB is your CBLAS library
|
||||
BLASLIB is your Legacy BLAS library (by default the Reference BLAS shipped within LAPACK)
|
||||
|
||||
Then type:
|
||||
|
||||
prompt> make
|
||||
|
||||
which will create the CBLAS library.
|
||||
|
||||
CREATING THE TESTERS
|
||||
|
||||
type:
|
||||
|
||||
prompt> make cblas_testing
|
||||
|
||||
This will create the BLAS library if necessary, then compile the CBLAS testings.
|
||||
|
||||
EXECUTING THE TESTERS
|
||||
|
||||
type:
|
||||
|
||||
prompt> make runtst
|
||||
|
||||
_______________________________________________________________________________
|
||||
|
||||
This package contains C interface to Legacy BLAS.
|
||||
|
||||
Written by Keita Teranishi (5/20/98)
|
||||
_______________________________________________________________________________
|
||||
|
||||
This release updates an inconsistency between the BLAST document and
|
||||
the interface. According to the document, the enumerated types for
|
||||
the C interface to the BLAS are not typedef'ed.
|
||||
|
||||
It also updates the Level 2 and 3 testers which check for correct
|
||||
exiting of routines when called with bad arguments. This is done by
|
||||
overriding the Legacy BLAS library's implementation of xerbla(). If
|
||||
this cannot be done ( for instance one cannot override some calls
|
||||
to xerbla() in Sun's Performance library), then correct error
|
||||
exiting cannot be checked.
|
||||
|
||||
Updated by Jeff Horner (3/15/99)
|
||||
_______________________________________________________________________________
|
||||
|
||||
Updated by R. Clint Whaley (2/23/03):
|
||||
|
||||
Fixed the i?amax error that I reported three years ago: standard dictates
|
||||
IAMAX return vals in range 0 <= iamax < N, but reference was mistakenly
|
||||
returning like F77: 0 < iamax <= N.
|
||||
_______________________________________________________________________________
|
||||
|
||||
Updated by Julie Langou (08/22/2014):
|
||||
|
||||
Integrate CBLAS package into LAPACK
|
||||
Improve headers for mangling
|
|
@ -0,0 +1,9 @@
|
|||
prefix=@prefix@
|
||||
libdir=@libdir@
|
||||
|
||||
Name: lapacke
|
||||
Description: C Standard Interface to BLAS Linear Algebra PACKage
|
||||
Version: @LAPACK_VERSION@
|
||||
URL: http://www.netlib.org/lapack/
|
||||
Libs: -L${libdir} -lcblas
|
||||
Requires: blas
|
|
@ -0,0 +1,14 @@
|
|||
# Load the LAPACK package with which we were built.
|
||||
set(LAPACK_DIR "@LAPACK_BINARY_DIR@")
|
||||
find_package(LAPACK NO_MODULE)
|
||||
|
||||
# Load lapack targets from the build tree, including lapacke targets.
|
||||
if(NOT TARGET lapacke)
|
||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
||||
endif()
|
||||
|
||||
# Report lapacke header search locations.
|
||||
set(CBLAS_INCLUDE_DIRS "@LAPACK_SOURCE_DIR@/cblas/include")
|
||||
|
||||
# Report lapacke libraries.
|
||||
set(CBLAS_LIBRARIES cblas)
|
|
@ -0,0 +1,23 @@
|
|||
# Compute locations from <prefix>/lib/cmake/lapacke-<v>/<self>.cmake
|
||||
get_filename_component(_CBLAS_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
|
||||
get_filename_component(_CBLAS_PREFIX "${_CBLAS_SELF_DIR}" PATH)
|
||||
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
||||
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
|
||||
|
||||
# Load the LAPACK package with which we were built.
|
||||
set(LAPACK_DIR "${_CBLAS_PREFIX}/lib/cmake/lapack-@LAPACK_VERSION@")
|
||||
find_package(LAPACK NO_MODULE)
|
||||
|
||||
# Load lapacke targets from the install tree.
|
||||
if(NOT TARGET cblas)
|
||||
include(${_CBLAS_SELF_DIR}/cblas-targets.cmake)
|
||||
endif()
|
||||
|
||||
# Report lapacke header search locations.
|
||||
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include)
|
||||
|
||||
# Report lapacke libraries.
|
||||
set(CBLAS_LIBRARIES cblas)
|
||||
|
||||
unset(_CBLAS_PREFIX)
|
||||
unset(_CBLAS_SELF_DIR)
|
|
@ -0,0 +1,8 @@
|
|||
add_executable(xexample1_CBLAS cblas_example1.c )
|
||||
add_executable(xexample2_CBLAS cblas_example2.c )
|
||||
|
||||
target_link_libraries(xexample1_CBLAS cblas ${BLAS_LIBRARIES})
|
||||
target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES})
|
||||
|
||||
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS)
|
||||
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS)
|
|
@ -0,0 +1,14 @@
|
|||
include ../../make.inc
|
||||
|
||||
all: example1 example2
|
||||
|
||||
example1:
|
||||
$(CC) -c $(CFLAGS) -I../include cblas_example1.c
|
||||
$(LOADER) -o cblas_ex1 cblas_example1.o $(CBLASLIB) $(BLASLIB)
|
||||
|
||||
example2:
|
||||
$(CC) -c $(CFLAGS) -I../include cblas_example2.c
|
||||
$(LOADER) -o cblas_ex2 cblas_example2.o $(CBLASLIB) $(BLASLIB)
|
||||
|
||||
cleanall:
|
||||
rm -f *.o cblas_ex1 cblas_ex2
|
|
@ -0,0 +1,69 @@
|
|||
/* cblas_example.c */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
|
||||
int main ( )
|
||||
{
|
||||
CBLAS_LAYOUT Layout;
|
||||
CBLAS_TRANSPOSE transa;
|
||||
|
||||
double *a, *x, *y;
|
||||
double alpha, beta;
|
||||
int m, n, lda, incx, incy, i;
|
||||
|
||||
Layout = CblasColMajor;
|
||||
transa = CblasNoTrans;
|
||||
|
||||
m = 4; /* Size of Column ( the number of rows ) */
|
||||
n = 4; /* Size of Row ( the number of columns ) */
|
||||
lda = 4; /* Leading dimension of 5 * 4 matrix is 5 */
|
||||
incx = 1;
|
||||
incy = 1;
|
||||
alpha = 1;
|
||||
beta = 0;
|
||||
|
||||
a = (double *)malloc(sizeof(double)*m*n);
|
||||
x = (double *)malloc(sizeof(double)*n);
|
||||
y = (double *)malloc(sizeof(double)*n);
|
||||
/* The elements of the first column */
|
||||
a[0] = 1;
|
||||
a[1] = 2;
|
||||
a[2] = 3;
|
||||
a[3] = 4;
|
||||
/* The elements of the second column */
|
||||
a[m] = 1;
|
||||
a[m+1] = 1;
|
||||
a[m+2] = 1;
|
||||
a[m+3] = 1;
|
||||
/* The elements of the third column */
|
||||
a[m*2] = 3;
|
||||
a[m*2+1] = 4;
|
||||
a[m*2+2] = 5;
|
||||
a[m*2+3] = 6;
|
||||
/* The elements of the fourth column */
|
||||
a[m*3] = 5;
|
||||
a[m*3+1] = 6;
|
||||
a[m*3+2] = 7;
|
||||
a[m*3+3] = 8;
|
||||
/* The elemetns of x and y */
|
||||
x[0] = 1;
|
||||
x[1] = 2;
|
||||
x[2] = 1;
|
||||
x[3] = 1;
|
||||
y[0] = 0;
|
||||
y[1] = 0;
|
||||
y[2] = 0;
|
||||
y[3] = 0;
|
||||
|
||||
cblas_dgemv( Layout, transa, m, n, alpha, a, lda, x, incx, beta,
|
||||
y, incy );
|
||||
/* Print y */
|
||||
for( i = 0; i < n; i++ )
|
||||
printf(" y%d = %f\n", i, y[i]);
|
||||
free(a);
|
||||
free(x);
|
||||
free(y);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,72 @@
|
|||
/* cblas_example2.c */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
|
||||
#define INVALID -1
|
||||
|
||||
int main (int argc, char **argv )
|
||||
{
|
||||
int rout=-1,info=0,m,n,k,lda,ldb,ldc;
|
||||
double A[2] = {0.0,0.0},
|
||||
B[2] = {0.0,0.0},
|
||||
C[2] = {0.0,0.0},
|
||||
ALPHA=0.0, BETA=0.0;
|
||||
|
||||
if (argc > 2){
|
||||
rout = atoi(argv[1]);
|
||||
info = atoi(argv[2]);
|
||||
}
|
||||
|
||||
if (rout == 1) {
|
||||
if (info==0) {
|
||||
printf("Checking if cblas_dgemm fails on parameter 4\n");
|
||||
cblas_dgemm( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
}
|
||||
if (info==1) {
|
||||
printf("Checking if cblas_dgemm fails on parameter 5\n");
|
||||
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
}
|
||||
if (info==2) {
|
||||
printf("Checking if cblas_dgemm fails on parameter 9\n");
|
||||
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 2 );
|
||||
}
|
||||
if (info==3) {
|
||||
printf("Checking if cblas_dgemm fails on parameter 11\n");
|
||||
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 2,
|
||||
ALPHA, A, 1, B, 1, BETA, C, 1 );
|
||||
}
|
||||
} else {
|
||||
if (info==0) {
|
||||
printf("Checking if F77_dgemm fails on parameter 3\n");
|
||||
m=INVALID; n=0; k=0; lda=1; ldb=1; ldc=1;
|
||||
F77_dgemm( "T", "N", &m, &n, &k,
|
||||
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||
}
|
||||
if (info==1) {
|
||||
m=0; n=INVALID; k=0; lda=1; ldb=1; ldc=1;
|
||||
printf("Checking if F77_dgemm fails on parameter 4\n");
|
||||
F77_dgemm( "N", "T", &m, &n, &k,
|
||||
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||
}
|
||||
if (info==2) {
|
||||
printf("Checking if F77_dgemm fails on parameter 8\n");
|
||||
m=2; n=0; k=0; lda=1; ldb=1; ldc=2;
|
||||
F77_dgemm( "N", "N" , &m, &n, &k,
|
||||
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||
}
|
||||
if (info==3) {
|
||||
printf("Checking if F77_dgemm fails on parameter 10\n");
|
||||
m=0; n=0; k=2; lda=1; ldb=1; ldc=1;
|
||||
F77_dgemm( "N", "N" , &m, &n, &k,
|
||||
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
SET (CBLAS_INCLUDE cblas.h cblas_f77.h cblas_test.h cblas_mangling.h)
|
||||
|
||||
file(COPY ${CBLAS_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include)
|
|
@ -0,0 +1,588 @@
|
|||
#ifndef CBLAS_H
|
||||
#define CBLAS_H
|
||||
#include <stddef.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" { /* Assume C declarations for C++ */
|
||||
#endif /* __cplusplus */
|
||||
|
||||
/*
|
||||
* Enumerated and derived types
|
||||
*/
|
||||
#ifdef WeirdNEC
|
||||
#define CBLAS_INDEX long
|
||||
#else
|
||||
#define CBLAS_INDEX int
|
||||
#endif
|
||||
|
||||
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;
|
||||
typedef enum {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113} CBLAS_TRANSPOSE;
|
||||
typedef enum {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
|
||||
typedef enum {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
|
||||
typedef enum {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
|
||||
|
||||
typedef CBLAS_LAYOUT CBLAS_ORDER; /* this for backward compatibility with CBLAS_ORDER */
|
||||
|
||||
#include "cblas_mangling.h"
|
||||
|
||||
/*
|
||||
* ===========================================================================
|
||||
* Prototypes for level 1 BLAS functions (complex are recast as routines)
|
||||
* ===========================================================================
|
||||
*/
|
||||
|
||||
double cblas_dcabs1(const void *z);
|
||||
float cblas_scabs1(const void *c);
|
||||
|
||||
float cblas_sdsdot(const int N, const float alpha, const float *X,
|
||||
const int incX, const float *Y, const int incY);
|
||||
double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
|
||||
const int incY);
|
||||
float cblas_sdot(const int N, const float *X, const int incX,
|
||||
const float *Y, const int incY);
|
||||
double cblas_ddot(const int N, const double *X, const int incX,
|
||||
const double *Y, const int incY);
|
||||
|
||||
/*
|
||||
* Functions having prefixes Z and C only
|
||||
*/
|
||||
void cblas_cdotu_sub(const int N, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *dotu);
|
||||
void cblas_cdotc_sub(const int N, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *dotc);
|
||||
|
||||
void cblas_zdotu_sub(const int N, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *dotu);
|
||||
void cblas_zdotc_sub(const int N, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *dotc);
|
||||
|
||||
|
||||
/*
|
||||
* Functions having prefixes S D SC DZ
|
||||
*/
|
||||
float cblas_snrm2(const int N, const float *X, const int incX);
|
||||
float cblas_sasum(const int N, const float *X, const int incX);
|
||||
|
||||
double cblas_dnrm2(const int N, const double *X, const int incX);
|
||||
double cblas_dasum(const int N, const double *X, const int incX);
|
||||
|
||||
float cblas_scnrm2(const int N, const void *X, const int incX);
|
||||
float cblas_scasum(const int N, const void *X, const int incX);
|
||||
|
||||
double cblas_dznrm2(const int N, const void *X, const int incX);
|
||||
double cblas_dzasum(const int N, const void *X, const int incX);
|
||||
|
||||
|
||||
/*
|
||||
* Functions having standard 4 prefixes (S D C Z)
|
||||
*/
|
||||
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX);
|
||||
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
|
||||
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
|
||||
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
|
||||
|
||||
/*
|
||||
* ===========================================================================
|
||||
* Prototypes for level 1 BLAS routines
|
||||
* ===========================================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Routines with standard 4 prefixes (s, d, c, z)
|
||||
*/
|
||||
void cblas_sswap(const int N, float *X, const int incX,
|
||||
float *Y, const int incY);
|
||||
void cblas_scopy(const int N, const float *X, const int incX,
|
||||
float *Y, const int incY);
|
||||
void cblas_saxpy(const int N, const float alpha, const float *X,
|
||||
const int incX, float *Y, const int incY);
|
||||
|
||||
void cblas_dswap(const int N, double *X, const int incX,
|
||||
double *Y, const int incY);
|
||||
void cblas_dcopy(const int N, const double *X, const int incX,
|
||||
double *Y, const int incY);
|
||||
void cblas_daxpy(const int N, const double alpha, const double *X,
|
||||
const int incX, double *Y, const int incY);
|
||||
|
||||
void cblas_cswap(const int N, void *X, const int incX,
|
||||
void *Y, const int incY);
|
||||
void cblas_ccopy(const int N, const void *X, const int incX,
|
||||
void *Y, const int incY);
|
||||
void cblas_caxpy(const int N, const void *alpha, const void *X,
|
||||
const int incX, void *Y, const int incY);
|
||||
|
||||
void cblas_zswap(const int N, void *X, const int incX,
|
||||
void *Y, const int incY);
|
||||
void cblas_zcopy(const int N, const void *X, const int incX,
|
||||
void *Y, const int incY);
|
||||
void cblas_zaxpy(const int N, const void *alpha, const void *X,
|
||||
const int incX, void *Y, const int incY);
|
||||
|
||||
|
||||
/*
|
||||
* Routines with S and D prefix only
|
||||
*/
|
||||
void cblas_srotg(float *a, float *b, float *c, float *s);
|
||||
void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
|
||||
void cblas_srot(const int N, float *X, const int incX,
|
||||
float *Y, const int incY, const float c, const float s);
|
||||
void cblas_srotm(const int N, float *X, const int incX,
|
||||
float *Y, const int incY, const float *P);
|
||||
|
||||
void cblas_drotg(double *a, double *b, double *c, double *s);
|
||||
void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
|
||||
void cblas_drot(const int N, double *X, const int incX,
|
||||
double *Y, const int incY, const double c, const double s);
|
||||
void cblas_drotm(const int N, double *X, const int incX,
|
||||
double *Y, const int incY, const double *P);
|
||||
|
||||
|
||||
/*
|
||||
* Routines with S D C Z CS and ZD prefixes
|
||||
*/
|
||||
void cblas_sscal(const int N, const float alpha, float *X, const int incX);
|
||||
void cblas_dscal(const int N, const double alpha, double *X, const int incX);
|
||||
void cblas_cscal(const int N, const void *alpha, void *X, const int incX);
|
||||
void cblas_zscal(const int N, const void *alpha, void *X, const int incX);
|
||||
void cblas_csscal(const int N, const float alpha, void *X, const int incX);
|
||||
void cblas_zdscal(const int N, const double alpha, void *X, const int incX);
|
||||
|
||||
/*
|
||||
* ===========================================================================
|
||||
* Prototypes for level 2 BLAS
|
||||
* ===========================================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Routines with standard 4 prefixes (S, D, C, Z)
|
||||
*/
|
||||
void cblas_sgemv(const CBLAS_LAYOUT layout,
|
||||
const CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const float alpha, const float *A, const int lda,
|
||||
const float *X, const int incX, const float beta,
|
||||
float *Y, const int incY);
|
||||
void cblas_sgbmv(CBLAS_LAYOUT layout,
|
||||
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const int KL, const int KU, const float alpha,
|
||||
const float *A, const int lda, const float *X,
|
||||
const int incX, const float beta, float *Y, const int incY);
|
||||
void cblas_strmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const float *A, const int lda,
|
||||
float *X, const int incX);
|
||||
void cblas_stbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const float *A, const int lda,
|
||||
float *X, const int incX);
|
||||
void cblas_stpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const float *Ap, float *X, const int incX);
|
||||
void cblas_strsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const float *A, const int lda, float *X,
|
||||
const int incX);
|
||||
void cblas_stbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const float *A, const int lda,
|
||||
float *X, const int incX);
|
||||
void cblas_stpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const float *Ap, float *X, const int incX);
|
||||
|
||||
void cblas_dgemv(CBLAS_LAYOUT layout,
|
||||
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const double alpha, const double *A, const int lda,
|
||||
const double *X, const int incX, const double beta,
|
||||
double *Y, const int incY);
|
||||
void cblas_dgbmv(CBLAS_LAYOUT layout,
|
||||
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const int KL, const int KU, const double alpha,
|
||||
const double *A, const int lda, const double *X,
|
||||
const int incX, const double beta, double *Y, const int incY);
|
||||
void cblas_dtrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const double *A, const int lda,
|
||||
double *X, const int incX);
|
||||
void cblas_dtbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const double *A, const int lda,
|
||||
double *X, const int incX);
|
||||
void cblas_dtpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const double *Ap, double *X, const int incX);
|
||||
void cblas_dtrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const double *A, const int lda, double *X,
|
||||
const int incX);
|
||||
void cblas_dtbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const double *A, const int lda,
|
||||
double *X, const int incX);
|
||||
void cblas_dtpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const double *Ap, double *X, const int incX);
|
||||
|
||||
void cblas_cgemv(CBLAS_LAYOUT layout,
|
||||
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *X, const int incX, const void *beta,
|
||||
void *Y, const int incY);
|
||||
void cblas_cgbmv(CBLAS_LAYOUT layout,
|
||||
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const int KL, const int KU, const void *alpha,
|
||||
const void *A, const int lda, const void *X,
|
||||
const int incX, const void *beta, void *Y, const int incY);
|
||||
void cblas_ctrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *A, const int lda,
|
||||
void *X, const int incX);
|
||||
void cblas_ctbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const void *A, const int lda,
|
||||
void *X, const int incX);
|
||||
void cblas_ctpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *Ap, void *X, const int incX);
|
||||
void cblas_ctrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *A, const int lda, void *X,
|
||||
const int incX);
|
||||
void cblas_ctbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const void *A, const int lda,
|
||||
void *X, const int incX);
|
||||
void cblas_ctpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *Ap, void *X, const int incX);
|
||||
|
||||
void cblas_zgemv(CBLAS_LAYOUT layout,
|
||||
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *X, const int incX, const void *beta,
|
||||
void *Y, const int incY);
|
||||
void cblas_zgbmv(CBLAS_LAYOUT layout,
|
||||
CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const int KL, const int KU, const void *alpha,
|
||||
const void *A, const int lda, const void *X,
|
||||
const int incX, const void *beta, void *Y, const int incY);
|
||||
void cblas_ztrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *A, const int lda,
|
||||
void *X, const int incX);
|
||||
void cblas_ztbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const void *A, const int lda,
|
||||
void *X, const int incX);
|
||||
void cblas_ztpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *Ap, void *X, const int incX);
|
||||
void cblas_ztrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *A, const int lda, void *X,
|
||||
const int incX);
|
||||
void cblas_ztbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const int K, const void *A, const int lda,
|
||||
void *X, const int incX);
|
||||
void cblas_ztpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
|
||||
const int N, const void *Ap, void *X, const int incX);
|
||||
|
||||
|
||||
/*
|
||||
* Routines with S and D prefixes only
|
||||
*/
|
||||
void cblas_ssymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const float *A,
|
||||
const int lda, const float *X, const int incX,
|
||||
const float beta, float *Y, const int incY);
|
||||
void cblas_ssbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const int K, const float alpha, const float *A,
|
||||
const int lda, const float *X, const int incX,
|
||||
const float beta, float *Y, const int incY);
|
||||
void cblas_sspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const float *Ap,
|
||||
const float *X, const int incX,
|
||||
const float beta, float *Y, const int incY);
|
||||
void cblas_sger(CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const float alpha, const float *X, const int incX,
|
||||
const float *Y, const int incY, float *A, const int lda);
|
||||
void cblas_ssyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const float *X,
|
||||
const int incX, float *A, const int lda);
|
||||
void cblas_sspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const float *X,
|
||||
const int incX, float *Ap);
|
||||
void cblas_ssyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const float *X,
|
||||
const int incX, const float *Y, const int incY, float *A,
|
||||
const int lda);
|
||||
void cblas_sspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const float *X,
|
||||
const int incX, const float *Y, const int incY, float *A);
|
||||
|
||||
void cblas_dsymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const double *A,
|
||||
const int lda, const double *X, const int incX,
|
||||
const double beta, double *Y, const int incY);
|
||||
void cblas_dsbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const int K, const double alpha, const double *A,
|
||||
const int lda, const double *X, const int incX,
|
||||
const double beta, double *Y, const int incY);
|
||||
void cblas_dspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const double *Ap,
|
||||
const double *X, const int incX,
|
||||
const double beta, double *Y, const int incY);
|
||||
void cblas_dger(CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const double alpha, const double *X, const int incX,
|
||||
const double *Y, const int incY, double *A, const int lda);
|
||||
void cblas_dsyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const double *X,
|
||||
const int incX, double *A, const int lda);
|
||||
void cblas_dspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const double *X,
|
||||
const int incX, double *Ap);
|
||||
void cblas_dsyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const double *X,
|
||||
const int incX, const double *Y, const int incY, double *A,
|
||||
const int lda);
|
||||
void cblas_dspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const double *X,
|
||||
const int incX, const double *Y, const int incY, double *A);
|
||||
|
||||
|
||||
/*
|
||||
* Routines with C and Z prefixes only
|
||||
*/
|
||||
void cblas_chemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const void *alpha, const void *A,
|
||||
const int lda, const void *X, const int incX,
|
||||
const void *beta, void *Y, const int incY);
|
||||
void cblas_chbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const int K, const void *alpha, const void *A,
|
||||
const int lda, const void *X, const int incX,
|
||||
const void *beta, void *Y, const int incY);
|
||||
void cblas_chpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const void *alpha, const void *Ap,
|
||||
const void *X, const int incX,
|
||||
const void *beta, void *Y, const int incY);
|
||||
void cblas_cgeru(CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda);
|
||||
void cblas_cgerc(CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda);
|
||||
void cblas_cher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const void *X, const int incX,
|
||||
void *A, const int lda);
|
||||
void cblas_chpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const void *X,
|
||||
const int incX, void *A);
|
||||
void cblas_cher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda);
|
||||
void cblas_chpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *Ap);
|
||||
|
||||
void cblas_zhemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const void *alpha, const void *A,
|
||||
const int lda, const void *X, const int incX,
|
||||
const void *beta, void *Y, const int incY);
|
||||
void cblas_zhbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const int K, const void *alpha, const void *A,
|
||||
const int lda, const void *X, const int incX,
|
||||
const void *beta, void *Y, const int incY);
|
||||
void cblas_zhpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const void *alpha, const void *Ap,
|
||||
const void *X, const int incX,
|
||||
const void *beta, void *Y, const int incY);
|
||||
void cblas_zgeru(CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda);
|
||||
void cblas_zgerc(CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda);
|
||||
void cblas_zher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const void *X, const int incX,
|
||||
void *A, const int lda);
|
||||
void cblas_zhpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
const int N, const double alpha, const void *X,
|
||||
const int incX, void *A);
|
||||
void cblas_zher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda);
|
||||
void cblas_zhpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *Ap);
|
||||
|
||||
/*
|
||||
* ===========================================================================
|
||||
* Prototypes for level 3 BLAS
|
||||
* ===========================================================================
|
||||
*/
|
||||
|
||||
/*
|
||||
* Routines with standard 4 prefixes (S, D, C, Z)
|
||||
*/
|
||||
void cblas_sgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const float alpha, const float *A,
|
||||
const int lda, const float *B, const int ldb,
|
||||
const float beta, float *C, const int ldc);
|
||||
void cblas_ssymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const float alpha, const float *A, const int lda,
|
||||
const float *B, const int ldb, const float beta,
|
||||
float *C, const int ldc);
|
||||
void cblas_ssyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const float alpha, const float *A, const int lda,
|
||||
const float beta, float *C, const int ldc);
|
||||
void cblas_ssyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const float alpha, const float *A, const int lda,
|
||||
const float *B, const int ldb, const float beta,
|
||||
float *C, const int ldc);
|
||||
void cblas_strmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const float alpha, const float *A, const int lda,
|
||||
float *B, const int ldb);
|
||||
void cblas_strsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const float alpha, const float *A, const int lda,
|
||||
float *B, const int ldb);
|
||||
|
||||
void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const double alpha, const double *A,
|
||||
const int lda, const double *B, const int ldb,
|
||||
const double beta, double *C, const int ldc);
|
||||
void cblas_dsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const double alpha, const double *A, const int lda,
|
||||
const double *B, const int ldb, const double beta,
|
||||
double *C, const int ldc);
|
||||
void cblas_dsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const double alpha, const double *A, const int lda,
|
||||
const double beta, double *C, const int ldc);
|
||||
void cblas_dsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const double alpha, const double *A, const int lda,
|
||||
const double *B, const int ldb, const double beta,
|
||||
double *C, const int ldc);
|
||||
void cblas_dtrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const double alpha, const double *A, const int lda,
|
||||
double *B, const int ldb);
|
||||
void cblas_dtrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const double alpha, const double *A, const int lda,
|
||||
double *B, const int ldb);
|
||||
|
||||
void cblas_cgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const void *alpha, const void *A,
|
||||
const int lda, const void *B, const int ldb,
|
||||
const void *beta, void *C, const int ldc);
|
||||
void cblas_csymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc);
|
||||
void cblas_csyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *beta, void *C, const int ldc);
|
||||
void cblas_csyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc);
|
||||
void cblas_ctrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
void *B, const int ldb);
|
||||
void cblas_ctrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
void *B, const int ldb);
|
||||
|
||||
void cblas_zgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const void *alpha, const void *A,
|
||||
const int lda, const void *B, const int ldb,
|
||||
const void *beta, void *C, const int ldc);
|
||||
void cblas_zsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc);
|
||||
void cblas_zsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *beta, void *C, const int ldc);
|
||||
void cblas_zsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc);
|
||||
void cblas_ztrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
void *B, const int ldb);
|
||||
void cblas_ztrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
|
||||
CBLAS_DIAG Diag, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
void *B, const int ldb);
|
||||
|
||||
|
||||
/*
|
||||
* Routines with prefixes C and Z only
|
||||
*/
|
||||
void cblas_chemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc);
|
||||
void cblas_cherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const float alpha, const void *A, const int lda,
|
||||
const float beta, void *C, const int ldc);
|
||||
void cblas_cher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const float beta,
|
||||
void *C, const int ldc);
|
||||
|
||||
void cblas_zhemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
|
||||
CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc);
|
||||
void cblas_zherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const double alpha, const void *A, const int lda,
|
||||
const double beta, void *C, const int ldc);
|
||||
void cblas_zher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
|
||||
CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const double beta,
|
||||
void *C, const int ldc);
|
||||
|
||||
void cblas_xerbla(int p, const char *rout, const char *form, ...);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
|
@ -0,0 +1,394 @@
|
|||
/*
|
||||
* cblas_f77.h
|
||||
* Written by Keita Teranishi
|
||||
*
|
||||
* Updated by Jeff Horner
|
||||
* Merged cblas_f77.h and cblas_fortran_header.h
|
||||
*/
|
||||
|
||||
#ifndef CBLAS_F77_H
|
||||
#define CBLAS_F77_H
|
||||
|
||||
#ifdef CRAY
|
||||
#include <fortran.h>
|
||||
#define F77_CHAR _fcd
|
||||
#define C2F_CHAR(a) ( _cptofcd( (a), 1 ) )
|
||||
#define C2F_STR(a, i) ( _cptofcd( (a), (i) ) )
|
||||
#define F77_STRLEN(a) (_fcdlen)
|
||||
#endif
|
||||
|
||||
#ifdef WeirdNEC
|
||||
#define F77_INT long
|
||||
#endif
|
||||
|
||||
#ifdef F77_CHAR
|
||||
#define FCHAR F77_CHAR
|
||||
#else
|
||||
#define FCHAR char *
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
#define FINT const F77_INT *
|
||||
#define FINT2 F77_INT *
|
||||
#else
|
||||
#define FINT const int *
|
||||
#define FINT2 int *
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Level 1 BLAS
|
||||
*/
|
||||
|
||||
#define F77_xerbla F77_GLOBAL(xerbla,XERBLA)
|
||||
#define F77_srotg F77_GLOBAL(srotg,SROTG)
|
||||
#define F77_srotmg F77_GLOBAL(srotmg,SROTMG)
|
||||
#define F77_srot F77_GLOBAL(srot,SROT)
|
||||
#define F77_srotm F77_GLOBAL(srotm,SROTM)
|
||||
#define F77_drotg F77_GLOBAL(drotg,DROTG)
|
||||
#define F77_drotmg F77_GLOBAL(drotmg,DROTMG)
|
||||
#define F77_drot F77_GLOBAL(drot,DROT)
|
||||
#define F77_drotm F77_GLOBAL(drotm,DROTM)
|
||||
#define F77_sswap F77_GLOBAL(sswap,SSWAP)
|
||||
#define F77_scopy F77_GLOBAL(scopy,SCOPY)
|
||||
#define F77_saxpy F77_GLOBAL(saxpy,SAXPY)
|
||||
#define F77_isamax_sub F77_GLOBAL(isamaxsub,ISAMAXSUB)
|
||||
#define F77_dswap F77_GLOBAL(dswap,DSWAP)
|
||||
#define F77_dcopy F77_GLOBAL(dcopy,DCOPY)
|
||||
#define F77_daxpy F77_GLOBAL(daxpy,DAXPY)
|
||||
#define F77_idamax_sub F77_GLOBAL(idamaxsub,IDAMAXSUB)
|
||||
#define F77_cswap F77_GLOBAL(cswap,CSWAP)
|
||||
#define F77_ccopy F77_GLOBAL(ccopy,CCOPY)
|
||||
#define F77_caxpy F77_GLOBAL(caxpy,CAXPY)
|
||||
#define F77_icamax_sub F77_GLOBAL(icamaxsub,ICAMAXSUB)
|
||||
#define F77_zswap F77_GLOBAL(zswap,ZSWAP)
|
||||
#define F77_zcopy F77_GLOBAL(zcopy,ZCOPY)
|
||||
#define F77_zaxpy F77_GLOBAL(zaxpy,ZAXPY)
|
||||
#define F77_izamax_sub F77_GLOBAL(izamaxsub,IZAMAXSUB)
|
||||
#define F77_sdot_sub F77_GLOBAL(sdotsub,SDOTSUB)
|
||||
#define F77_ddot_sub F77_GLOBAL(ddotsub,DDOTSUB)
|
||||
#define F77_dsdot_sub F77_GLOBAL(dsdotsub,DSDOTSUB)
|
||||
#define F77_sscal F77_GLOBAL(sscal,SSCAL)
|
||||
#define F77_dscal F77_GLOBAL(dscal,DSCAL)
|
||||
#define F77_cscal F77_GLOBAL(cscal,CSCAL)
|
||||
#define F77_zscal F77_GLOBAL(zscal,ZSCAL)
|
||||
#define F77_csscal F77_GLOBAL(csscal,CSSCAL)
|
||||
#define F77_zdscal F77_GLOBAL(zdscal,ZDSCAL)
|
||||
#define F77_cdotu_sub F77_GLOBAL(cdotusub,CDOTUSUB)
|
||||
#define F77_cdotc_sub F77_GLOBAL(cdotcsub,CDOTCSUB)
|
||||
#define F77_zdotu_sub F77_GLOBAL(zdotusub,ZDOTUSUB)
|
||||
#define F77_zdotc_sub F77_GLOBAL(zdotcsub,ZDOTCSUB)
|
||||
#define F77_snrm2_sub F77_GLOBAL(snrm2sub,SNRM2SUB)
|
||||
#define F77_sasum_sub F77_GLOBAL(sasumsub,SASUMSUB)
|
||||
#define F77_dnrm2_sub F77_GLOBAL(dnrm2sub,DNRM2SUB)
|
||||
#define F77_dasum_sub F77_GLOBAL(dasumsub,DASUMSUB)
|
||||
#define F77_scnrm2_sub F77_GLOBAL(scnrm2sub,SCNRM2SUB)
|
||||
#define F77_scasum_sub F77_GLOBAL(scasumsub,SCASUMSUB)
|
||||
#define F77_dznrm2_sub F77_GLOBAL(dznrm2sub,DZNRM2SUB)
|
||||
#define F77_dzasum_sub F77_GLOBAL(dzasumsub,DZASUMSUB)
|
||||
#define F77_sdsdot_sub F77_GLOBAL(sdsdotsub,SDSDOTSUB)
|
||||
/*
|
||||
* Level 2 BLAS
|
||||
*/
|
||||
#define F77_ssymv F77_GLOBAL(ssymv,SSYMY)
|
||||
#define F77_ssbmv F77_GLOBAL(ssbmv,SSMBV)
|
||||
#define F77_sspmv F77_GLOBAL(sspmv,SSPMV)
|
||||
#define F77_sger F77_GLOBAL(sger,SGER)
|
||||
#define F77_ssyr F77_GLOBAL(ssyr,SSYR)
|
||||
#define F77_sspr F77_GLOBAL(sspr,SSPR)
|
||||
#define F77_ssyr2 F77_GLOBAL(ssyr2,SSYR2)
|
||||
#define F77_sspr2 F77_GLOBAL(sspr2,SSPR2)
|
||||
#define F77_dsymv F77_GLOBAL(dsymv,DSYMV)
|
||||
#define F77_dsbmv F77_GLOBAL(dsbmv,DSBMV)
|
||||
#define F77_dspmv F77_GLOBAL(dspmv,DSPMV)
|
||||
#define F77_dger F77_GLOBAL(dger,DGER)
|
||||
#define F77_dsyr F77_GLOBAL(dsyr,DSYR)
|
||||
#define F77_dspr F77_GLOBAL(dspr,DSPR)
|
||||
#define F77_dsyr2 F77_GLOBAL(dsyr2,DSYR2)
|
||||
#define F77_dspr2 F77_GLOBAL(dspr2,DSPR2)
|
||||
#define F77_chemv F77_GLOBAL(chemv,CHEMV)
|
||||
#define F77_chbmv F77_GLOBAL(chbmv,CHBMV)
|
||||
#define F77_chpmv F77_GLOBAL(chpmv,CHPMV)
|
||||
#define F77_cgeru F77_GLOBAL(cgeru,CGERU)
|
||||
#define F77_cgerc F77_GLOBAL(cgerc,CGERC)
|
||||
#define F77_cher F77_GLOBAL(cher,CHER)
|
||||
#define F77_chpr F77_GLOBAL(chpr,CHPR)
|
||||
#define F77_cher2 F77_GLOBAL(cher2,CHER2)
|
||||
#define F77_chpr2 F77_GLOBAL(chpr2,CHPR2)
|
||||
#define F77_zhemv F77_GLOBAL(zhemv,ZHEMV)
|
||||
#define F77_zhbmv F77_GLOBAL(zhbmv,ZHBMV)
|
||||
#define F77_zhpmv F77_GLOBAL(zhpmv,ZHPMV)
|
||||
#define F77_zgeru F77_GLOBAL(zgeru,ZGERU)
|
||||
#define F77_zgerc F77_GLOBAL(zgerc,ZGERC)
|
||||
#define F77_zher F77_GLOBAL(zher,ZHER)
|
||||
#define F77_zhpr F77_GLOBAL(zhpr,ZHPR)
|
||||
#define F77_zher2 F77_GLOBAL(zher2,ZHER2)
|
||||
#define F77_zhpr2 F77_GLOBAL(zhpr2,ZHPR2)
|
||||
#define F77_sgemv F77_GLOBAL(sgemv,SGEMV)
|
||||
#define F77_sgbmv F77_GLOBAL(sgbmv,SGBMV)
|
||||
#define F77_strmv F77_GLOBAL(strmv,STRMV)
|
||||
#define F77_stbmv F77_GLOBAL(stbmv,STBMV)
|
||||
#define F77_stpmv F77_GLOBAL(stpmv,STPMV)
|
||||
#define F77_strsv F77_GLOBAL(strsv,STRSV)
|
||||
#define F77_stbsv F77_GLOBAL(stbsv,STBSV)
|
||||
#define F77_stpsv F77_GLOBAL(stpsv,STPSV)
|
||||
#define F77_dgemv F77_GLOBAL(dgemv,DGEMV)
|
||||
#define F77_dgbmv F77_GLOBAL(dgbmv,DGBMV)
|
||||
#define F77_dtrmv F77_GLOBAL(dtrmv,DTRMV)
|
||||
#define F77_dtbmv F77_GLOBAL(dtbmv,DTBMV)
|
||||
#define F77_dtpmv F77_GLOBAL(dtpmv,DTRMV)
|
||||
#define F77_dtrsv F77_GLOBAL(dtrsv,DTRSV)
|
||||
#define F77_dtbsv F77_GLOBAL(dtbsv,DTBSV)
|
||||
#define F77_dtpsv F77_GLOBAL(dtpsv,DTPSV)
|
||||
#define F77_cgemv F77_GLOBAL(cgemv,CGEMV)
|
||||
#define F77_cgbmv F77_GLOBAL(cgbmv,CGBMV)
|
||||
#define F77_ctrmv F77_GLOBAL(ctrmv,CTRMV)
|
||||
#define F77_ctbmv F77_GLOBAL(ctbmv,CTBMV)
|
||||
#define F77_ctpmv F77_GLOBAL(ctpmv,CTPMV)
|
||||
#define F77_ctrsv F77_GLOBAL(ctrsv,CTRSV)
|
||||
#define F77_ctbsv F77_GLOBAL(ctbsv,CTBSV)
|
||||
#define F77_ctpsv F77_GLOBAL(ctpsv,CTPSV)
|
||||
#define F77_zgemv F77_GLOBAL(zgemv,ZGEMV)
|
||||
#define F77_zgbmv F77_GLOBAL(zgbmv,ZGBMV)
|
||||
#define F77_ztrmv F77_GLOBAL(ztrmv,ZTRMV)
|
||||
#define F77_ztbmv F77_GLOBAL(ztbmv,ZTBMV)
|
||||
#define F77_ztpmv F77_GLOBAL(ztpmv,ZTPMV)
|
||||
#define F77_ztrsv F77_GLOBAL(ztrsv,ZTRSV)
|
||||
#define F77_ztbsv F77_GLOBAL(ztbsv,ZTBSV)
|
||||
#define F77_ztpsv F77_GLOBAL(ztpsv,ZTPSV)
|
||||
/*
|
||||
* Level 3 BLAS
|
||||
*/
|
||||
#define F77_chemm F77_GLOBAL(chemm,CHEMM)
|
||||
#define F77_cherk F77_GLOBAL(cherk,CHERK)
|
||||
#define F77_cher2k F77_GLOBAL(cher2k,CHER2K)
|
||||
#define F77_zhemm F77_GLOBAL(zhemm,ZHEMM)
|
||||
#define F77_zherk F77_GLOBAL(zherk,ZHERK)
|
||||
#define F77_zher2k F77_GLOBAL(zher2k,ZHER2K)
|
||||
#define F77_sgemm F77_GLOBAL(sgemm,SGEMM)
|
||||
#define F77_ssymm F77_GLOBAL(ssymm,SSYMM)
|
||||
#define F77_ssyrk F77_GLOBAL(ssyrk,SSYRK)
|
||||
#define F77_ssyr2k F77_GLOBAL(ssyr2k,SSYR2K)
|
||||
#define F77_strmm F77_GLOBAL(strmm,STRMM)
|
||||
#define F77_strsm F77_GLOBAL(strsm,STRSM)
|
||||
#define F77_dgemm F77_GLOBAL(dgemm,DGEMM)
|
||||
#define F77_dsymm F77_GLOBAL(dsymm,DSYMM)
|
||||
#define F77_dsyrk F77_GLOBAL(dsyrk,DSYRK)
|
||||
#define F77_dsyr2k F77_GLOBAL(dsyr2k,DSYR2K)
|
||||
#define F77_dtrmm F77_GLOBAL(dtrmm,DTRMM)
|
||||
#define F77_dtrsm F77_GLOBAL(dtrsm,DTRSM)
|
||||
#define F77_cgemm F77_GLOBAL(cgemm,CGEMM)
|
||||
#define F77_csymm F77_GLOBAL(csymm,CSYMM)
|
||||
#define F77_csyrk F77_GLOBAL(csyrk,CSYRK)
|
||||
#define F77_csyr2k F77_GLOBAL(csyr2k,CSYR2K)
|
||||
#define F77_ctrmm F77_GLOBAL(ctrmm,CTRMM)
|
||||
#define F77_ctrsm F77_GLOBAL(ctrsm,CTRSM)
|
||||
#define F77_zgemm F77_GLOBAL(zgemm,ZGEMM)
|
||||
#define F77_zsymm F77_GLOBAL(zsymm,ZSYMM)
|
||||
#define F77_zsyrk F77_GLOBAL(zsyrk,ZSYRK)
|
||||
#define F77_zsyr2k F77_GLOBAL(zsyr2k,ZSYR2K)
|
||||
#define F77_ztrmm F77_GLOBAL(ztrmm,ZTRMM)
|
||||
#define F77_ztrsm F77_GLOBAL(ztrsm,ZTRSM)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void F77_xerbla(FCHAR, void *);
|
||||
/*
|
||||
* Level 1 Fortran Prototypes
|
||||
*/
|
||||
|
||||
/* Single Precision */
|
||||
|
||||
void F77_srot(FINT, float *, FINT, float *, FINT, const float *, const float *);
|
||||
void F77_srotg(float *,float *,float *,float *);
|
||||
void F77_srotm( FINT, float *, FINT, float *, FINT, const float *);
|
||||
void F77_srotmg(float *,float *,float *,const float *, float *);
|
||||
void F77_sswap( FINT, float *, FINT, float *, FINT);
|
||||
void F77_scopy( FINT, const float *, FINT, float *, FINT);
|
||||
void F77_saxpy( FINT, const float *, const float *, FINT, float *, FINT);
|
||||
void F77_sdot_sub(FINT, const float *, FINT, const float *, FINT, float *);
|
||||
void F77_sdsdot_sub( FINT, const float *, const float *, FINT, const float *, FINT, float *);
|
||||
void F77_sscal( FINT, const float *, float *, FINT);
|
||||
void F77_snrm2_sub( FINT, const float *, FINT, float *);
|
||||
void F77_sasum_sub( FINT, const float *, FINT, float *);
|
||||
void F77_isamax_sub( FINT, const float * , FINT, FINT2);
|
||||
|
||||
/* Double Precision */
|
||||
|
||||
void F77_drot(FINT, double *, FINT, double *, FINT, const double *, const double *);
|
||||
void F77_drotg(double *,double *,double *,double *);
|
||||
void F77_drotm( FINT, double *, FINT, double *, FINT, const double *);
|
||||
void F77_drotmg(double *,double *,double *,const double *, double *);
|
||||
void F77_dswap( FINT, double *, FINT, double *, FINT);
|
||||
void F77_dcopy( FINT, const double *, FINT, double *, FINT);
|
||||
void F77_daxpy( FINT, const double *, const double *, FINT, double *, FINT);
|
||||
void F77_dswap( FINT, double *, FINT, double *, FINT);
|
||||
void F77_dsdot_sub(FINT, const float *, FINT, const float *, FINT, double *);
|
||||
void F77_ddot_sub( FINT, const double *, FINT, const double *, FINT, double *);
|
||||
void F77_dscal( FINT, const double *, double *, FINT);
|
||||
void F77_dnrm2_sub( FINT, const double *, FINT, double *);
|
||||
void F77_dasum_sub( FINT, const double *, FINT, double *);
|
||||
void F77_idamax_sub( FINT, const double * , FINT, FINT2);
|
||||
|
||||
/* Single Complex Precision */
|
||||
|
||||
void F77_cswap( FINT, void *, FINT, void *, FINT);
|
||||
void F77_ccopy( FINT, const void *, FINT, void *, FINT);
|
||||
void F77_caxpy( FINT, const void *, const void *, FINT, void *, FINT);
|
||||
void F77_cswap( FINT, void *, FINT, void *, FINT);
|
||||
void F77_cdotc_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||
void F77_cdotu_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||
void F77_cscal( FINT, const void *, void *, FINT);
|
||||
void F77_icamax_sub( FINT, const void *, FINT, FINT2);
|
||||
void F77_csscal( FINT, const float *, void *, FINT);
|
||||
void F77_scnrm2_sub( FINT, const void *, FINT, float *);
|
||||
void F77_scasum_sub( FINT, const void *, FINT, float *);
|
||||
|
||||
/* Double Complex Precision */
|
||||
|
||||
void F77_zswap( FINT, void *, FINT, void *, FINT);
|
||||
void F77_zcopy( FINT, const void *, FINT, void *, FINT);
|
||||
void F77_zaxpy( FINT, const void *, const void *, FINT, void *, FINT);
|
||||
void F77_zswap( FINT, void *, FINT, void *, FINT);
|
||||
void F77_zdotc_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||
void F77_zdotu_sub( FINT, const void *, FINT, const void *, FINT, void *);
|
||||
void F77_zdscal( FINT, const double *, void *, FINT);
|
||||
void F77_zscal( FINT, const void *, void *, FINT);
|
||||
void F77_dznrm2_sub( FINT, const void *, FINT, double *);
|
||||
void F77_dzasum_sub( FINT, const void *, FINT, double *);
|
||||
void F77_izamax_sub( FINT, const void *, FINT, FINT2);
|
||||
|
||||
/*
|
||||
* Level 2 Fortran Prototypes
|
||||
*/
|
||||
|
||||
/* Single Precision */
|
||||
|
||||
void F77_sgemv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_sgbmv(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_ssymv(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_ssbmv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_sspmv(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_strmv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
|
||||
void F77_stbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT);
|
||||
void F77_strsv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
|
||||
void F77_stbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT);
|
||||
void F77_stpmv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT);
|
||||
void F77_stpsv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT);
|
||||
void F77_sger( FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT);
|
||||
void F77_ssyr(FCHAR, FINT, const float *, const float *, FINT, float *, FINT);
|
||||
void F77_sspr(FCHAR, FINT, const float *, const float *, FINT, float *);
|
||||
void F77_sspr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *);
|
||||
void F77_ssyr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT);
|
||||
|
||||
/* Double Precision */
|
||||
|
||||
void F77_dgemv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dgbmv(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dsymv(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dsbmv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dspmv(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dtrmv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
|
||||
void F77_dtbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT);
|
||||
void F77_dtrsv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
|
||||
void F77_dtbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT);
|
||||
void F77_dtpmv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT);
|
||||
void F77_dtpsv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT);
|
||||
void F77_dger( FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT);
|
||||
void F77_dsyr(FCHAR, FINT, const double *, const double *, FINT, double *, FINT);
|
||||
void F77_dspr(FCHAR, FINT, const double *, const double *, FINT, double *);
|
||||
void F77_dspr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *);
|
||||
void F77_dsyr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT);
|
||||
|
||||
/* Single Complex Precision */
|
||||
|
||||
void F77_cgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_cgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_chemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_chbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_chpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_ctrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ctbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ctpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT);
|
||||
void F77_ctrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ctbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ctpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT);
|
||||
void F77_cgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_cgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_cher(FCHAR, FINT, const float *, const void *, FINT, void *, FINT);
|
||||
void F77_cher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_chpr(FCHAR, FINT, const float *, const void *, FINT, void *);
|
||||
void F77_chpr2(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void *);
|
||||
|
||||
/* Double Complex Precision */
|
||||
|
||||
void F77_zgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_zgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_zhemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_zhbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_zhpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT);
|
||||
void F77_ztrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ztbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ztpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT);
|
||||
void F77_ztrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ztbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_ztpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT);
|
||||
void F77_zgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_zgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_zher(FCHAR, FINT, const double *, const void *, FINT, void *, FINT);
|
||||
void F77_zher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
|
||||
void F77_zhpr(FCHAR, FINT, const double *, const void *, FINT, void *);
|
||||
void F77_zhpr2(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void *);
|
||||
|
||||
/*
|
||||
* Level 3 Fortran Prototypes
|
||||
*/
|
||||
|
||||
/* Single Precision */
|
||||
|
||||
void F77_sgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_ssymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_ssyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_ssyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_strmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||
void F77_strsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||
|
||||
/* Double Precision */
|
||||
|
||||
void F77_dgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_dtrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||
void F77_dtrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||
|
||||
/* Single Complex Precision */
|
||||
|
||||
void F77_cgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_csymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_chemm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_csyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_cherk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_csyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_cher2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
|
||||
void F77_ctrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||
void F77_ctrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
|
||||
|
||||
/* Double Complex Precision */
|
||||
|
||||
void F77_zgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_zsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_zhemm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_zsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_zherk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_zsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_zher2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
|
||||
void F77_ztrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||
void F77_ztrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* CBLAS_F77_H */
|
|
@ -0,0 +1,17 @@
|
|||
#ifndef F77_HEADER_INCLUDED
|
||||
#define F77_HEADER_INCLUDED
|
||||
|
||||
#ifndef F77_GLOBAL
|
||||
#if defined(F77_GLOBAL_PATTERN_LC) || defined(ADD_)
|
||||
#define F77_GLOBAL(lcname,UCNAME) lcname##_
|
||||
#elif defined(F77_GLOBAL_PATTERN_UC) || defined(UPPER)
|
||||
#define F77_GLOBAL(lcname,UCNAME) UCNAME
|
||||
#elif defined(F77_GLOBAL_PATTERN_MC) || defined(NOCHANGE)
|
||||
#define F77_GLOBAL(lcname,UCNAME) lcname
|
||||
#else
|
||||
#define F77_GLOBAL(lcname,UCNAME) lcname##_
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* cblas_test.h
|
||||
* Written by Keita Teranishi
|
||||
*/
|
||||
#ifndef CBLAS_TEST_H
|
||||
#define CBLAS_TEST_H
|
||||
#include "cblas.h"
|
||||
#include "cblas_mangling.h"
|
||||
|
||||
#define TRUE 1
|
||||
#define PASSED 1
|
||||
#define TEST_ROW_MJR 1
|
||||
|
||||
#define FALSE 0
|
||||
#define FAILED 0
|
||||
#define TEST_COL_MJR 0
|
||||
|
||||
#define INVALID -1
|
||||
#define UNDEFINED -1
|
||||
|
||||
typedef struct { float real; float imag; } CBLAS_TEST_COMPLEX;
|
||||
typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX;
|
||||
|
||||
#define F77_xerbla F77_GLOBAL(xerbla,XERBLA)
|
||||
/*
|
||||
* Level 1 BLAS
|
||||
*/
|
||||
#define F77_srotg F77_GLOBAL(srotgtest,SROTGTEST)
|
||||
#define F77_srotmg F77_GLOBAL(srotmgtest,SROTMGTEST)
|
||||
#define F77_srot F77_GLOBAL(srottest,SROTTEST)
|
||||
#define F77_srotm F77_GLOBAL(srotmtest,SROTMTEST)
|
||||
#define F77_drotg F77_GLOBAL(drotgtest,DROTGTEST)
|
||||
#define F77_drotmg F77_GLOBAL(drotmgtest,DROTMGTEST)
|
||||
#define F77_drot F77_GLOBAL(drottest,DROTTEST)
|
||||
#define F77_drotm F77_GLOBAL(drotmtest,DROTMTEST)
|
||||
#define F77_sswap F77_GLOBAL(sswaptest,SSWAPTEST)
|
||||
#define F77_scopy F77_GLOBAL(scopytest,SCOPYTEST)
|
||||
#define F77_saxpy F77_GLOBAL(saxpytest,SAXPYTEST)
|
||||
#define F77_isamax F77_GLOBAL(isamaxtest,ISAMAXTEST)
|
||||
#define F77_dswap F77_GLOBAL(dswaptest,DSWAPTEST)
|
||||
#define F77_dcopy F77_GLOBAL(dcopytest,DCOPYTEST)
|
||||
#define F77_daxpy F77_GLOBAL(daxpytest,DAXPYTEST)
|
||||
#define F77_idamax F77_GLOBAL(idamaxtest,IDAMAXTEST)
|
||||
#define F77_cswap F77_GLOBAL(cswaptest,CSWAPTEST)
|
||||
#define F77_ccopy F77_GLOBAL(ccopytest,CCOPYTEST)
|
||||
#define F77_caxpy F77_GLOBAL(caxpytest,CAXPYTEST)
|
||||
#define F77_icamax F77_GLOBAL(icamaxtest,ICAMAXTEST)
|
||||
#define F77_zswap F77_GLOBAL(zswaptest,ZSWAPTEST)
|
||||
#define F77_zcopy F77_GLOBAL(zcopytest,ZCOPYTEST)
|
||||
#define F77_zaxpy F77_GLOBAL(zaxpytest,ZAXPYTEST)
|
||||
#define F77_izamax F77_GLOBAL(izamaxtest,IZAMAXTEST)
|
||||
#define F77_sdot F77_GLOBAL(sdottest,SDOTTEST)
|
||||
#define F77_ddot F77_GLOBAL(ddottest,DDOTTEST)
|
||||
#define F77_dsdot F77_GLOBAL(dsdottest,DSDOTTEST)
|
||||
#define F77_sscal F77_GLOBAL(sscaltest,SSCALTEST)
|
||||
#define F77_dscal F77_GLOBAL(dscaltest,DSCALTEST)
|
||||
#define F77_cscal F77_GLOBAL(cscaltest,CSCALTEST)
|
||||
#define F77_zscal F77_GLOBAL(zscaltest,ZSCALTEST)
|
||||
#define F77_csscal F77_GLOBAL(csscaltest,CSSCALTEST)
|
||||
#define F77_zdscal F77_GLOBAL(zdscaltest,ZDSCALTEST)
|
||||
#define F77_cdotu F77_GLOBAL(cdotutest,CDOTUTEST)
|
||||
#define F77_cdotc F77_GLOBAL(cdotctest,CDOTCTEST)
|
||||
#define F77_zdotu F77_GLOBAL(zdotutest,ZDOTUTEST)
|
||||
#define F77_zdotc F77_GLOBAL(zdotctest,ZDOTCTEST)
|
||||
#define F77_snrm2 F77_GLOBAL(snrm2test,SNRM2TEST)
|
||||
#define F77_sasum F77_GLOBAL(sasumtest,SASUMTEST)
|
||||
#define F77_dnrm2 F77_GLOBAL(dnrm2test,DNRM2TEST)
|
||||
#define F77_dasum F77_GLOBAL(dasumtest,DASUMTEST)
|
||||
#define F77_scnrm2 F77_GLOBAL(scnrm2test,SCNRM2TEST)
|
||||
#define F77_scasum F77_GLOBAL(scasumtest,SCASUMTEST)
|
||||
#define F77_dznrm2 F77_GLOBAL(dznrm2test,DZNRM2TEST)
|
||||
#define F77_dzasum F77_GLOBAL(dzasumtest,DZASUMTEST)
|
||||
#define F77_sdsdot F77_GLOBAL(sdsdottest, SDSDOTTEST)
|
||||
/*
|
||||
* Level 2 BLAS
|
||||
*/
|
||||
#define F77_s2chke F77_GLOBAL(cs2chke,CS2CHKE)
|
||||
#define F77_d2chke F77_GLOBAL(cd2chke,CD2CHKE)
|
||||
#define F77_c2chke F77_GLOBAL(cc2chke,CC2CHKE)
|
||||
#define F77_z2chke F77_GLOBAL(cz2chke,CZ2CHKE)
|
||||
#define F77_ssymv F77_GLOBAL(cssymv,CSSYMV)
|
||||
#define F77_ssbmv F77_GLOBAL(cssbmv,CSSBMV)
|
||||
#define F77_sspmv F77_GLOBAL(csspmv,CSSPMV)
|
||||
#define F77_sger F77_GLOBAL(csger,CSGER)
|
||||
#define F77_ssyr F77_GLOBAL(cssyr,CSSYR)
|
||||
#define F77_sspr F77_GLOBAL(csspr,CSSPR)
|
||||
#define F77_ssyr2 F77_GLOBAL(cssyr2,CSSYR2)
|
||||
#define F77_sspr2 F77_GLOBAL(csspr2,CSSPR2)
|
||||
#define F77_dsymv F77_GLOBAL(cdsymv,CDSYMV)
|
||||
#define F77_dsbmv F77_GLOBAL(cdsbmv,CDSBMV)
|
||||
#define F77_dspmv F77_GLOBAL(cdspmv,CDSPMV)
|
||||
#define F77_dger F77_GLOBAL(cdger,CDGER)
|
||||
#define F77_dsyr F77_GLOBAL(cdsyr,CDSYR)
|
||||
#define F77_dspr F77_GLOBAL(cdspr,CDSPR)
|
||||
#define F77_dsyr2 F77_GLOBAL(cdsyr2,CDSYR2)
|
||||
#define F77_dspr2 F77_GLOBAL(cdspr2,CDSPR2)
|
||||
#define F77_chemv F77_GLOBAL(cchemv,CCHEMV)
|
||||
#define F77_chbmv F77_GLOBAL(cchbmv,CCHBMV)
|
||||
#define F77_chpmv F77_GLOBAL(cchpmv,CCHPMV)
|
||||
#define F77_cgeru F77_GLOBAL(ccgeru,CCGERU)
|
||||
#define F77_cgerc F77_GLOBAL(ccgerc,CCGERC)
|
||||
#define F77_cher F77_GLOBAL(ccher,CCHER)
|
||||
#define F77_chpr F77_GLOBAL(cchpr,CCHPR)
|
||||
#define F77_cher2 F77_GLOBAL(ccher2,CCHER2)
|
||||
#define F77_chpr2 F77_GLOBAL(cchpr2,CCHPR2)
|
||||
#define F77_zhemv F77_GLOBAL(czhemv,CZHEMV)
|
||||
#define F77_zhbmv F77_GLOBAL(czhbmv,CZHBMV)
|
||||
#define F77_zhpmv F77_GLOBAL(czhpmv,CZHPMV)
|
||||
#define F77_zgeru F77_GLOBAL(czgeru,CZGERU)
|
||||
#define F77_zgerc F77_GLOBAL(czgerc,CZGERC)
|
||||
#define F77_zher F77_GLOBAL(czher,CZHER)
|
||||
#define F77_zhpr F77_GLOBAL(czhpr,CZHPR)
|
||||
#define F77_zher2 F77_GLOBAL(czher2,CZHER2)
|
||||
#define F77_zhpr2 F77_GLOBAL(czhpr2,CZHPR2)
|
||||
#define F77_sgemv F77_GLOBAL(csgemv,CSGEMV)
|
||||
#define F77_sgbmv F77_GLOBAL(csgbmv,CSGBMV)
|
||||
#define F77_strmv F77_GLOBAL(cstrmv,CSTRMV)
|
||||
#define F77_stbmv F77_GLOBAL(cstbmv,CSTBMV)
|
||||
#define F77_stpmv F77_GLOBAL(cstpmv,CSTPMV)
|
||||
#define F77_strsv F77_GLOBAL(cstrsv,CSTRSV)
|
||||
#define F77_stbsv F77_GLOBAL(cstbsv,CSTBSV)
|
||||
#define F77_stpsv F77_GLOBAL(cstpsv,CSTPSV)
|
||||
#define F77_dgemv F77_GLOBAL(cdgemv,CDGEMV)
|
||||
#define F77_dgbmv F77_GLOBAL(cdgbmv,CDGBMV)
|
||||
#define F77_dtrmv F77_GLOBAL(cdtrmv,CDTRMV)
|
||||
#define F77_dtbmv F77_GLOBAL(cdtbmv,CDTBMV)
|
||||
#define F77_dtpmv F77_GLOBAL(cdtpmv,CDTPMV)
|
||||
#define F77_dtrsv F77_GLOBAL(cdtrsv,CDTRSV)
|
||||
#define F77_dtbsv F77_GLOBAL(cdtbsv,CDTBSV)
|
||||
#define F77_dtpsv F77_GLOBAL(cdtpsv,CDTPSV)
|
||||
#define F77_cgemv F77_GLOBAL(ccgemv,CCGEMV)
|
||||
#define F77_cgbmv F77_GLOBAL(ccgbmv,CCGBMV)
|
||||
#define F77_ctrmv F77_GLOBAL(cctrmv,CCTRMV)
|
||||
#define F77_ctbmv F77_GLOBAL(cctbmv,CCTPMV)
|
||||
#define F77_ctpmv F77_GLOBAL(cctpmv,CCTPMV)
|
||||
#define F77_ctrsv F77_GLOBAL(cctrsv,CCTRSV)
|
||||
#define F77_ctbsv F77_GLOBAL(cctbsv,CCTBSV)
|
||||
#define F77_ctpsv F77_GLOBAL(cctpsv,CCTPSV)
|
||||
#define F77_zgemv F77_GLOBAL(czgemv,CZGEMV)
|
||||
#define F77_zgbmv F77_GLOBAL(czgbmv,CZGBMV)
|
||||
#define F77_ztrmv F77_GLOBAL(cztrmv,CZTRMV)
|
||||
#define F77_ztbmv F77_GLOBAL(cztbmv,CZTBMV)
|
||||
#define F77_ztpmv F77_GLOBAL(cztpmv,CZTPMV)
|
||||
#define F77_ztrsv F77_GLOBAL(cztrsv,CZTRSV)
|
||||
#define F77_ztbsv F77_GLOBAL(cztbsv,CZTBSV)
|
||||
#define F77_ztpsv F77_GLOBAL(cztpsv,CZTPSV)
|
||||
/*
|
||||
* Level 3 BLAS
|
||||
*/
|
||||
#define F77_s3chke F77_GLOBAL(cs3chke,CS3CHKE)
|
||||
#define F77_d3chke F77_GLOBAL(cd3chke,CD3CHKE)
|
||||
#define F77_c3chke F77_GLOBAL(cc3chke,CC3CHKE)
|
||||
#define F77_z3chke F77_GLOBAL(cz3chke,CZ3CHKE)
|
||||
#define F77_chemm F77_GLOBAL(cchemm,CCHEMM)
|
||||
#define F77_cherk F77_GLOBAL(ccherk,CCHERK)
|
||||
#define F77_cher2k F77_GLOBAL(ccher2k,CCHER2K)
|
||||
#define F77_zhemm F77_GLOBAL(czhemm,CZHEMM)
|
||||
#define F77_zherk F77_GLOBAL(czherk,CZHERK)
|
||||
#define F77_zher2k F77_GLOBAL(czher2k,CZHER2K)
|
||||
#define F77_sgemm F77_GLOBAL(csgemm,CSGEMM)
|
||||
#define F77_ssymm F77_GLOBAL(cssymm,CSSYMM)
|
||||
#define F77_ssyrk F77_GLOBAL(cssyrk,CSSYRK)
|
||||
#define F77_ssyr2k F77_GLOBAL(cssyr2k,CSSYR2K)
|
||||
#define F77_strmm F77_GLOBAL(cstrmm,CSTRMM)
|
||||
#define F77_strsm F77_GLOBAL(cstrsm,CSTRSM)
|
||||
#define F77_dgemm F77_GLOBAL(cdgemm,CDGEMM)
|
||||
#define F77_dsymm F77_GLOBAL(cdsymm,CDSYMM)
|
||||
#define F77_dsyrk F77_GLOBAL(cdsyrk,CDSYRK)
|
||||
#define F77_dsyr2k F77_GLOBAL(cdsyr2k,CDSYR2K)
|
||||
#define F77_dtrmm F77_GLOBAL(cdtrmm,CDTRMM)
|
||||
#define F77_dtrsm F77_GLOBAL(cdtrsm,CDTRSM)
|
||||
#define F77_cgemm F77_GLOBAL(ccgemm,CCGEMM)
|
||||
#define F77_csymm F77_GLOBAL(ccsymm,CCSYMM)
|
||||
#define F77_csyrk F77_GLOBAL(ccsyrk,CCSYRK)
|
||||
#define F77_csyr2k F77_GLOBAL(ccsyr2k,CCSYR2K)
|
||||
#define F77_ctrmm F77_GLOBAL(cctrmm,CCTRMM)
|
||||
#define F77_ctrsm F77_GLOBAL(cctrsm,CCTRSM)
|
||||
#define F77_zgemm F77_GLOBAL(czgemm,CZGEMM)
|
||||
#define F77_zsymm F77_GLOBAL(czsymm,CZSYMM)
|
||||
#define F77_zsyrk F77_GLOBAL(czsyrk,CZSYRK)
|
||||
#define F77_zsyr2k F77_GLOBAL(czsyr2k,CZSYR2K)
|
||||
#define F77_ztrmm F77_GLOBAL(cztrmm,CZTRMM)
|
||||
#define F77_ztrsm F77_GLOBAL(cztrsm, CZTRSM)
|
||||
|
||||
void get_transpose_type(char *type, CBLAS_TRANSPOSE *trans);
|
||||
void get_uplo_type(char *type, CBLAS_UPLO *uplo);
|
||||
void get_diag_type(char *type, CBLAS_DIAG *diag);
|
||||
void get_side_type(char *type, CBLAS_SIDE *side);
|
||||
|
||||
#endif /* CBLAS_TEST_H */
|
|
@ -0,0 +1,168 @@
|
|||
# This Makefile compiles the CBLAS routines
|
||||
#
|
||||
# Error handling routines for level 2 & 3
|
||||
|
||||
set (ERRHAND cblas_globals.c cblas_xerbla.c xerbla.c)
|
||||
|
||||
#
|
||||
#
|
||||
# CBLAS routines
|
||||
#
|
||||
# Level 1
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# All object files for single real precision
|
||||
#
|
||||
set (SLEV1 cblas_srotg.c cblas_srotmg.c cblas_srot.c cblas_srotm.c
|
||||
cblas_sswap.c cblas_sscal.c cblas_scopy.c cblas_saxpy.c
|
||||
cblas_sdot.c cblas_sdsdot.c cblas_snrm2.c cblas_sasum.c
|
||||
cblas_isamax.c sdotsub.f sdsdotsub.f snrm2sub.f sasumsub.f
|
||||
isamaxsub.f)
|
||||
#
|
||||
# All object files for double real precision
|
||||
#
|
||||
set (DLEV1 cblas_drotg.c cblas_drotmg.c cblas_drot.c cblas_drotm.c
|
||||
cblas_dswap.c cblas_dscal.c cblas_dcopy.c cblas_daxpy.c
|
||||
cblas_ddot.c cblas_dsdot.c cblas_dnrm2.c cblas_dasum.c
|
||||
cblas_idamax.c ddotsub.f dsdotsub.f dnrm2sub.f
|
||||
dasumsub.f idamaxsub.f)
|
||||
|
||||
#
|
||||
# All object files for single complex precision
|
||||
#
|
||||
set (CLEV1 cblas_cswap.c cblas_cscal.c cblas_csscal.c cblas_ccopy.c
|
||||
cblas_caxpy.c cblas_cdotu_sub.c cblas_cdotc_sub.c
|
||||
cblas_icamax.c cdotcsub.f cdotusub.f icamaxsub.f)
|
||||
|
||||
#
|
||||
# All object files for double complex precision
|
||||
#
|
||||
set (ZLEV1 cblas_zswap.c cblas_zscal.c cblas_zdscal.c cblas_zcopy.c
|
||||
cblas_zaxpy.c cblas_zdotu_sub.c cblas_zdotc_sub.c cblas_dznrm2.c
|
||||
cblas_dzasum.c cblas_izamax.c zdotcsub.f zdotusub.f
|
||||
dzasumsub.f dznrm2sub.f izamaxsub.f)
|
||||
|
||||
|
||||
#
|
||||
# Common files for single complex precision
|
||||
#
|
||||
set (SCLEV1 cblas_scasum.c scasumsub.f cblas_scnrm2.c scnrm2sub.f)
|
||||
|
||||
|
||||
#
|
||||
# All object files
|
||||
#
|
||||
set (ALEV1 ${slev1} ${dlev1} ${clev1} ${zlev1} ${sclev1})
|
||||
|
||||
|
||||
#
|
||||
#
|
||||
# CBLAS routines
|
||||
#
|
||||
# Level 2
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# All object files for single real precision
|
||||
#
|
||||
set (SLEV2 cblas_sgemv.c cblas_sgbmv.c cblas_sger.c cblas_ssbmv.c cblas_sspmv.c
|
||||
cblas_sspr.c cblas_sspr2.c cblas_ssymv.c cblas_ssyr.c cblas_ssyr2.c
|
||||
cblas_stbmv.c cblas_stbsv.c cblas_stpmv.c cblas_stpsv.c cblas_strmv.c
|
||||
cblas_strsv.c)
|
||||
|
||||
|
||||
#
|
||||
# All object files for double real precision
|
||||
#
|
||||
set (DLEV2 cblas_dgemv.c cblas_dgbmv.c cblas_dger.c cblas_dsbmv.c cblas_dspmv.c
|
||||
cblas_dspr.c cblas_dspr2.c cblas_dsymv.c cblas_dsyr.c cblas_dsyr2.c
|
||||
cblas_dtbmv.c cblas_dtbsv.c cblas_dtpmv.c cblas_dtpsv.c cblas_dtrmv.c
|
||||
cblas_dtrsv.c)
|
||||
|
||||
#
|
||||
# All object files for single complex precision
|
||||
#
|
||||
set (CLEV2 cblas_cgemv.c cblas_cgbmv.c cblas_chemv.c cblas_chbmv.c cblas_chpmv.c
|
||||
cblas_ctrmv.c cblas_ctbmv.c cblas_ctpmv.c cblas_ctrsv.c cblas_ctbsv.c
|
||||
cblas_ctpsv.c cblas_cgeru.c cblas_cgerc.c cblas_cher.c cblas_cher2.c
|
||||
cblas_chpr.c cblas_chpr2.c)
|
||||
|
||||
#
|
||||
# All object files for double complex precision
|
||||
#
|
||||
set (ZLEV2 cblas_zgemv.c cblas_zgbmv.c cblas_zhemv.c cblas_zhbmv.c cblas_zhpmv.c
|
||||
cblas_ztrmv.c cblas_ztbmv.c cblas_ztpmv.c cblas_ztrsv.c cblas_ztbsv.c
|
||||
cblas_ztpsv.c cblas_zgeru.c cblas_zgerc.c cblas_zher.c cblas_zher2.c
|
||||
cblas_zhpr.c cblas_zhpr2.c)
|
||||
#
|
||||
# All object files
|
||||
#
|
||||
set (AVEL2 ${slev2} ${dlev2} ${clev2} ${zlev2})
|
||||
|
||||
#
|
||||
#
|
||||
# CBLAS routines
|
||||
#
|
||||
# Level 3
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# All object files for single real precision
|
||||
#
|
||||
set (SLEV3 cblas_sgemm.c cblas_ssymm.c cblas_ssyrk.c cblas_ssyr2k.c cblas_strmm.c
|
||||
cblas_strsm.c)
|
||||
#
|
||||
# All object files for double real precision
|
||||
#
|
||||
set (DLEV3 cblas_dgemm.c cblas_dsymm.c cblas_dsyrk.c cblas_dsyr2k.c cblas_dtrmm.c
|
||||
cblas_dtrsm.c)
|
||||
#
|
||||
# All object files for single complex precision
|
||||
#
|
||||
set (CLEV3 cblas_cgemm.c cblas_csymm.c cblas_chemm.c cblas_cherk.c
|
||||
cblas_cher2k.c cblas_ctrmm.c cblas_ctrsm.c cblas_csyrk.c
|
||||
cblas_csyr2k.c)
|
||||
#
|
||||
# All object files for double complex precision
|
||||
#
|
||||
set (ZLEV3 cblas_zgemm.c cblas_zsymm.c cblas_zhemm.c cblas_zherk.c
|
||||
cblas_zher2k.c cblas_ztrmm.c cblas_ztrsm.c cblas_zsyrk.c
|
||||
cblas_zsyr2k.c)
|
||||
#
|
||||
# All object files
|
||||
#
|
||||
set (ALEV3 ${slev3} ${dlev3} ${clev3} ${zlev3})
|
||||
|
||||
# default build all of it
|
||||
set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND}
|
||||
${DLEV1} ${DLEV2} ${DLEV3}
|
||||
${CLEV1} ${CLEV2} ${CLEV3}
|
||||
${ZLEV1} ${ZLEV2} ${ZLEV3} )
|
||||
|
||||
# Single real precision
|
||||
if(CBLAS_SINGLE)
|
||||
set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND})
|
||||
endif(CBLAS_SINGLE)
|
||||
|
||||
# Double real precision
|
||||
if(CBLAS_DOUBLE)
|
||||
set(ALLOBJ ${DLEV1} ${DLEV2} ${DLEV3} ${ERRHAND})
|
||||
endif(CBLAS_DOUBLE)
|
||||
|
||||
# Single complex precision
|
||||
if (CBLAS_COMPLEX)
|
||||
set(ALLOBJ ${CLEV1} ${SCLEV1} ${CLEV2} ${CLEV3} ${ERRHAND})
|
||||
endif(CBLAS_COMPLEX)
|
||||
|
||||
# Double complex precision
|
||||
if (CBLAS_COMPLEX16)
|
||||
set(ALLOBJ ${ZLEV1} ${ZLEV2} ${ZLEV3} ${ERRHAND})
|
||||
endif(CBLAS_COMPLEX16)
|
||||
|
||||
add_library(cblas ${ALLOBJ})
|
||||
target_link_libraries(cblas ${BLAS_LIBRARIES} )
|
||||
lapack_install_library(cblas)
|
|
@ -0,0 +1,249 @@
|
|||
# This Makefile compiles the CBLAS routines
|
||||
#
|
||||
include ../../make.inc
|
||||
|
||||
#
|
||||
# Erase all object and archive files
|
||||
#
|
||||
all: cblaslib
|
||||
|
||||
clean:
|
||||
rm -f *.o a.out core
|
||||
|
||||
# Error handling routines for level 2 & 3
|
||||
|
||||
errhand = cblas_globals.o cblas_xerbla.o xerbla.o
|
||||
|
||||
# Object files of all routines
|
||||
alev = $(alev1) $(alev2) $(alev3) $(errhand)
|
||||
#
|
||||
#
|
||||
# CBLAS routines
|
||||
#
|
||||
# Level 1
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# All object files for single real precision
|
||||
#
|
||||
slev1 = cblas_srotg.o cblas_srotmg.o cblas_srot.o cblas_srotm.o \
|
||||
cblas_sswap.o cblas_sscal.o cblas_scopy.o cblas_saxpy.o \
|
||||
cblas_sdot.o cblas_sdsdot.o cblas_snrm2.o cblas_sasum.o \
|
||||
cblas_isamax.o sdotsub.o sdsdotsub.o snrm2sub.o sasumsub.o \
|
||||
isamaxsub.o
|
||||
#
|
||||
# All object files for double real precision
|
||||
#
|
||||
dlev1 = cblas_drotg.o cblas_drotmg.o cblas_drot.o cblas_drotm.o \
|
||||
cblas_dswap.o cblas_dscal.o cblas_dcopy.o cblas_daxpy.o \
|
||||
cblas_ddot.o cblas_dsdot.o cblas_dnrm2.o cblas_dasum.o \
|
||||
cblas_idamax.o ddotsub.o dsdotsub.o dnrm2sub.o \
|
||||
dasumsub.o idamaxsub.o
|
||||
|
||||
#
|
||||
# All object files for single complex precision
|
||||
#
|
||||
clev1 = cblas_cswap.o cblas_cscal.o cblas_csscal.o cblas_ccopy.o \
|
||||
cblas_caxpy.o cblas_cdotu_sub.o cblas_cdotc_sub.o \
|
||||
cblas_icamax.o cdotcsub.o cdotusub.o icamaxsub.o
|
||||
|
||||
#
|
||||
# All object files for double complex precision
|
||||
#
|
||||
zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \
|
||||
cblas_zaxpy.o cblas_zdotu_sub.o cblas_zdotc_sub.o cblas_dznrm2.o \
|
||||
cblas_dzasum.o cblas_izamax.o zdotcsub.o zdotusub.o \
|
||||
dzasumsub.o dznrm2sub.o izamaxsub.o
|
||||
|
||||
#
|
||||
# Common files for single / complex precision
|
||||
#
|
||||
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o
|
||||
|
||||
#
|
||||
# All object files
|
||||
#
|
||||
alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1)
|
||||
|
||||
|
||||
#
|
||||
# Make an archive file
|
||||
#
|
||||
|
||||
# Single real precision
|
||||
slib1: $(slev1) $(sclev1)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev1) $(sclev1)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Double real precision
|
||||
dlib1: $(dlev1)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev1)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Single complex precision
|
||||
clib1: $(clev1) $(sclev1)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev1) $(sclev1)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Double complex precision
|
||||
zlib1: $(zlev1)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev1)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# All precisions
|
||||
all1: $(alev1)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev1)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
#
|
||||
#
|
||||
# CBLAS routines
|
||||
#
|
||||
# Level 2
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# All object files for single real precision
|
||||
#
|
||||
slev2 = cblas_sgemv.o cblas_sgbmv.o cblas_sger.o cblas_ssbmv.o cblas_sspmv.o \
|
||||
cblas_sspr.o cblas_sspr2.o cblas_ssymv.o cblas_ssyr.o cblas_ssyr2.o \
|
||||
cblas_stbmv.o cblas_stbsv.o cblas_stpmv.o cblas_stpsv.o cblas_strmv.o \
|
||||
cblas_strsv.o
|
||||
|
||||
#
|
||||
# All object files for double real precision
|
||||
#
|
||||
dlev2 = cblas_dgemv.o cblas_dgbmv.o cblas_dger.o cblas_dsbmv.o cblas_dspmv.o \
|
||||
cblas_dspr.o cblas_dspr2.o cblas_dsymv.o cblas_dsyr.o cblas_dsyr2.o \
|
||||
cblas_dtbmv.o cblas_dtbsv.o cblas_dtpmv.o cblas_dtpsv.o cblas_dtrmv.o \
|
||||
cblas_dtrsv.o
|
||||
|
||||
#
|
||||
# All object files for single complex precision
|
||||
#
|
||||
clev2 = cblas_cgemv.o cblas_cgbmv.o cblas_chemv.o cblas_chbmv.o cblas_chpmv.o \
|
||||
cblas_ctrmv.o cblas_ctbmv.o cblas_ctpmv.o cblas_ctrsv.o cblas_ctbsv.o \
|
||||
cblas_ctpsv.o cblas_cgeru.o cblas_cgerc.o cblas_cher.o cblas_cher2.o \
|
||||
cblas_chpr.o cblas_chpr2.o
|
||||
|
||||
#
|
||||
# All object files for double complex precision
|
||||
#
|
||||
zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \
|
||||
cblas_ztrmv.o cblas_ztbmv.o cblas_ztpmv.o cblas_ztrsv.o cblas_ztbsv.o \
|
||||
cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \
|
||||
cblas_zhpr.o cblas_zhpr2.o
|
||||
#
|
||||
# All object files
|
||||
#
|
||||
alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2)
|
||||
|
||||
#
|
||||
# Make an archive file
|
||||
#
|
||||
|
||||
# Single real precision
|
||||
slib2: $(slev2) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev2) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Double real precision
|
||||
dlib2: $(dlev2) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev2) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Single complex precision
|
||||
clib2: $(clev2) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev2) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Double complex precision
|
||||
zlib2: $(zlev2) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev2) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# All precisions
|
||||
all2: $(alev2) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev2) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
#
|
||||
#
|
||||
# CBLAS routines
|
||||
#
|
||||
# Level 3
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# All object files for single real precision
|
||||
#
|
||||
slev3 = cblas_sgemm.o cblas_ssymm.o cblas_ssyrk.o cblas_ssyr2k.o cblas_strmm.o\
|
||||
cblas_strsm.o
|
||||
|
||||
#
|
||||
# All object files for double real precision
|
||||
#
|
||||
dlev3 = cblas_dgemm.o cblas_dsymm.o cblas_dsyrk.o cblas_dsyr2k.o cblas_dtrmm.o\
|
||||
cblas_dtrsm.o
|
||||
|
||||
#
|
||||
# All object files for single complex precision
|
||||
#
|
||||
clev3 = cblas_cgemm.o cblas_csymm.o cblas_chemm.o cblas_cherk.o\
|
||||
cblas_cher2k.o cblas_ctrmm.o cblas_ctrsm.o cblas_csyrk.o\
|
||||
cblas_csyr2k.o
|
||||
#
|
||||
# All object files for double complex precision
|
||||
#
|
||||
zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o\
|
||||
cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o\
|
||||
cblas_zsyr2k.o
|
||||
#
|
||||
# All object files
|
||||
#
|
||||
alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3)
|
||||
|
||||
#
|
||||
# Make an archive file
|
||||
#
|
||||
|
||||
# Single real precision
|
||||
slib3: $(slev3) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev3) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Double real precision
|
||||
dlib3: $(dlev3) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev3) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Single complex precision
|
||||
clib3: $(clev3) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev3) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# Single complex precision
|
||||
zlib3: $(zlev3) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev3) $(errhand)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# All precisions
|
||||
all3: $(alev3) $(errhand)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev3)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
# All levels and precisions
|
||||
cblaslib: $(alev)
|
||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev)
|
||||
$(RANLIB) $(CBLASLIB)
|
||||
|
||||
FRC:
|
||||
@FRC=$(FRC)
|
||||
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) -I ../include -o $@ $<
|
||||
|
||||
.f.o:
|
||||
$(FORTRAN) $(OPTS) -c $< -o $@
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* cblas_caxpy.c
|
||||
*
|
||||
* The program is a C interface to caxpy.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_caxpy( const int N, const void *alpha, const void *X,
|
||||
const int incX, void *Y, const int incY)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
F77_caxpy( &F77_N, alpha, X, &F77_incX, Y, &F77_incY);
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* cblas_ccopy.c
|
||||
*
|
||||
* The program is a C interface to ccopy.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_ccopy( const int N, const void *X,
|
||||
const int incX, void *Y, const int incY)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
F77_ccopy( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* cblas_cdotc_sub.c
|
||||
*
|
||||
* The program is a C interface to cdotc.
|
||||
* It calls the fortran wrapper before calling cdotc.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cdotc_sub( const int N, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *dotc)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
F77_cdotc_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotc);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* cblas_cdotu_sub.c
|
||||
*
|
||||
* The program is a C interface to cdotu.
|
||||
* It calls the fortran wrapper before calling cdotu.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cdotu_sub( const int N, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *dotu)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
F77_cdotu_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotu);
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,165 @@
|
|||
/*
|
||||
* cblas_cgbmv.c
|
||||
* The program is a C interface of cgbmv
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cgbmv(const CBLAS_LAYOUT layout,
|
||||
const CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const int KL, const int KU,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *X, const int incX, const void *beta,
|
||||
void *Y, const int incY)
|
||||
{
|
||||
char TA;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TA;
|
||||
#else
|
||||
#define F77_TA &TA
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||
F77_INT F77_KL=KL,F77_KU=KU;
|
||||
#else
|
||||
#define F77_M M
|
||||
#define F77_N N
|
||||
#define F77_lda lda
|
||||
#define F77_KL KL
|
||||
#define F77_KU KU
|
||||
#define F77_incX incx
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
int n=0, i=0, incx=incX;
|
||||
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||
float ALPHA[2],BETA[2];
|
||||
int tincY, tincx;
|
||||
float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (TransA == CblasNoTrans) TA = 'N';
|
||||
else if (TransA == CblasTrans) TA = 'T';
|
||||
else if (TransA == CblasConjTrans) TA = 'C';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
#endif
|
||||
F77_cgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, alpha,
|
||||
A, &F77_lda, X, &F77_incX, beta, Y, &F77_incY);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (TransA == CblasNoTrans) TA = 'T';
|
||||
else if (TransA == CblasTrans) TA = 'N';
|
||||
else if (TransA == CblasConjTrans)
|
||||
{
|
||||
ALPHA[0]= *alp;
|
||||
ALPHA[1]= -alp[1];
|
||||
BETA[0]= *bet;
|
||||
BETA[1]= -bet[1];
|
||||
TA = 'N';
|
||||
if (M > 0)
|
||||
{
|
||||
n = M << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
tx = x;
|
||||
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1 ;
|
||||
tincx = 2;
|
||||
st= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
st = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != st);
|
||||
x=tx;
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
#endif
|
||||
|
||||
if( incY > 0 )
|
||||
tincY = incY;
|
||||
else
|
||||
tincY = -incY;
|
||||
|
||||
y++;
|
||||
|
||||
if (N > 0)
|
||||
{
|
||||
i = tincY << 1;
|
||||
n = i * N ;
|
||||
st = y + n;
|
||||
do {
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
} while(y != st);
|
||||
y -= n;
|
||||
}
|
||||
}
|
||||
else x = (float *) X;
|
||||
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
#endif
|
||||
if (TransA == CblasConjTrans)
|
||||
F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, ALPHA,
|
||||
A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY);
|
||||
else
|
||||
F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, alpha,
|
||||
A ,&F77_lda, x,&F77_incX, beta, Y, &F77_incY);
|
||||
if (TransA == CblasConjTrans)
|
||||
{
|
||||
if (x != X) free(x);
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
}
|
||||
while (y != st);
|
||||
}
|
||||
}
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_cgbmv", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
*
|
||||
* cblas_cgemm.c
|
||||
* This program is a C interface to cgemm.
|
||||
* Written by Keita Teranishi
|
||||
* 4/8/1998
|
||||
*
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA,
|
||||
const CBLAS_TRANSPOSE TransB, const int M, const int N,
|
||||
const int K, const void *alpha, const void *A,
|
||||
const int lda, const void *B, const int ldb,
|
||||
const void *beta, void *C, const int ldc)
|
||||
{
|
||||
char TA, TB;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TA, F77_TB;
|
||||
#else
|
||||
#define F77_TA &TA
|
||||
#define F77_TB &TB
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
|
||||
F77_INT F77_ldc=ldc;
|
||||
#else
|
||||
#define F77_M M
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_ldb ldb
|
||||
#define F77_ldc ldc
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if( layout == CblasColMajor )
|
||||
{
|
||||
if(TransA == CblasTrans) TA='T';
|
||||
else if ( TransA == CblasConjTrans ) TA='C';
|
||||
else if ( TransA == CblasNoTrans ) TA='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if(TransB == CblasTrans) TB='T';
|
||||
else if ( TransB == CblasConjTrans ) TB='C';
|
||||
else if ( TransB == CblasNoTrans ) TB='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_TB = C2F_CHAR(&TB);
|
||||
#endif
|
||||
|
||||
F77_cgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, alpha, A,
|
||||
&F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if(TransA == CblasTrans) TB='T';
|
||||
else if ( TransA == CblasConjTrans ) TB='C';
|
||||
else if ( TransA == CblasNoTrans ) TB='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if(TransB == CblasTrans) TA='T';
|
||||
else if ( TransB == CblasConjTrans ) TA='C';
|
||||
else if ( TransB == CblasNoTrans ) TA='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_TB = C2F_CHAR(&TB);
|
||||
#endif
|
||||
|
||||
F77_cgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, alpha, B,
|
||||
&F77_ldb, A, &F77_lda, beta, C, &F77_ldc);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_cgemm", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* cblas_cgemv.c
|
||||
* The program is a C interface of cgemv
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cgemv(const CBLAS_LAYOUT layout,
|
||||
const CBLAS_TRANSPOSE TransA, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *X, const int incX, const void *beta,
|
||||
void *Y, const int incY)
|
||||
{
|
||||
char TA;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TA;
|
||||
#else
|
||||
#define F77_TA &TA
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_M M
|
||||
#define F77_N N
|
||||
#define F77_lda lda
|
||||
#define F77_incX incx
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
int n=0, i=0, incx=incX;
|
||||
const float *xx= (const float *)X;
|
||||
float ALPHA[2],BETA[2];
|
||||
int tincY, tincx;
|
||||
float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0;
|
||||
const float *stx = x;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (TransA == CblasNoTrans) TA = 'N';
|
||||
else if (TransA == CblasTrans) TA = 'T';
|
||||
else if (TransA == CblasConjTrans) TA = 'C';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
#endif
|
||||
F77_cgemv(F77_TA, &F77_M, &F77_N, alpha, A, &F77_lda, X, &F77_incX,
|
||||
beta, Y, &F77_incY);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
|
||||
if (TransA == CblasNoTrans) TA = 'T';
|
||||
else if (TransA == CblasTrans) TA = 'N';
|
||||
else if (TransA == CblasConjTrans)
|
||||
{
|
||||
ALPHA[0]= *( (const float *) alpha );
|
||||
ALPHA[1]= -( *( (const float *) alpha+1) );
|
||||
BETA[0]= *( (const float *) beta );
|
||||
BETA[1]= -( *( (const float *) beta+1 ) );
|
||||
TA = 'N';
|
||||
if (M > 0)
|
||||
{
|
||||
n = M << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
tx = x;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1 ;
|
||||
tincx = 2;
|
||||
st= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
st = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != st);
|
||||
x=tx;
|
||||
|
||||
F77_incX = 1;
|
||||
|
||||
if(incY > 0)
|
||||
tincY = incY;
|
||||
else
|
||||
tincY = -incY;
|
||||
|
||||
y++;
|
||||
|
||||
if (N > 0)
|
||||
{
|
||||
i = tincY << 1;
|
||||
n = i * N ;
|
||||
st = y + n;
|
||||
do {
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
} while(y != st);
|
||||
y -= n;
|
||||
}
|
||||
stx = x;
|
||||
}
|
||||
else stx = (const float *)X;
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
#endif
|
||||
if (TransA == CblasConjTrans)
|
||||
F77_cgemv(F77_TA, &F77_N, &F77_M, ALPHA, A, &F77_lda, stx,
|
||||
&F77_incX, BETA, Y, &F77_incY);
|
||||
else
|
||||
F77_cgemv(F77_TA, &F77_N, &F77_M, alpha, A, &F77_lda, x,
|
||||
&F77_incX, beta, Y, &F77_incY);
|
||||
|
||||
if (TransA == CblasConjTrans)
|
||||
{
|
||||
if (x != (const float *)X) free(x);
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
}
|
||||
while (y != st);
|
||||
}
|
||||
}
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_cgemv", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
/*
|
||||
* cblas_cgerc.c
|
||||
* The program is a C interface to cgerc.
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cgerc(const CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_M M
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incy
|
||||
#define F77_lda lda
|
||||
#endif
|
||||
|
||||
int n, i, tincy, incy=incY;
|
||||
float *y=(float *)Y, *yy=(float *)Y, *ty, *st;
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
F77_cgerc( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A,
|
||||
&F77_lda);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
y = malloc(n*sizeof(float));
|
||||
|
||||
ty = y;
|
||||
if( incY > 0 ) {
|
||||
i = incY << 1;
|
||||
tincy = 2;
|
||||
st= y+n;
|
||||
} else {
|
||||
i = incY *(-2);
|
||||
tincy = -2;
|
||||
st = y-2;
|
||||
y +=(n-2);
|
||||
}
|
||||
do
|
||||
{
|
||||
*y = *yy;
|
||||
y[1] = -yy[1];
|
||||
y += tincy ;
|
||||
yy += i;
|
||||
}
|
||||
while (y != st);
|
||||
y = ty;
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incY = 1;
|
||||
#else
|
||||
incy = 1;
|
||||
#endif
|
||||
}
|
||||
else y = (float *) Y;
|
||||
|
||||
F77_cgeru( &F77_N, &F77_M, alpha, y, &F77_incY, X, &F77_incX, A,
|
||||
&F77_lda);
|
||||
if(Y!=y)
|
||||
free(y);
|
||||
|
||||
} else cblas_xerbla(1, "cblas_cgerc", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* cblas_cgeru.c
|
||||
* The program is a C interface to cgeru.
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cgeru(const CBLAS_LAYOUT layout, const int M, const int N,
|
||||
const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_M M
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#define F77_lda lda
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
F77_cgeru( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A,
|
||||
&F77_lda);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
F77_cgeru( &F77_N, &F77_M, alpha, Y, &F77_incY, X, &F77_incX, A,
|
||||
&F77_lda);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_cgeru","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* cblas_chbmv.c
|
||||
* The program is a C interface to chbmv
|
||||
*
|
||||
* Keita Teranishi 5/18/98
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
void cblas_chbmv(const CBLAS_LAYOUT layout,
|
||||
const CBLAS_UPLO Uplo,const int N,const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *X, const int incX, const void *beta,
|
||||
void *Y, const int incY)
|
||||
{
|
||||
char UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_UL;
|
||||
#else
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_incX incx
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
int n, i=0, incx=incX;
|
||||
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||
float ALPHA[2],BETA[2];
|
||||
int tincY, tincx;
|
||||
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasLower) UL = 'L';
|
||||
else if (Uplo == CblasUpper) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n",Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
F77_chbmv(F77_UL, &F77_N, &F77_K, alpha, A, &F77_lda, X,
|
||||
&F77_incX, beta, Y, &F77_incY);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
ALPHA[0]= *alp;
|
||||
ALPHA[1]= -alp[1];
|
||||
BETA[0]= *bet;
|
||||
BETA[1]= -bet[1];
|
||||
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
|
||||
tx = x;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1 ;
|
||||
tincx = 2;
|
||||
st= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
st = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != st);
|
||||
x=tx;
|
||||
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
#endif
|
||||
|
||||
if(incY > 0)
|
||||
tincY = incY;
|
||||
else
|
||||
tincY = -incY;
|
||||
y++;
|
||||
|
||||
i = tincY << 1;
|
||||
n = i * N ;
|
||||
st = y + n;
|
||||
do {
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
} while(y != st);
|
||||
y -= n;
|
||||
} else
|
||||
x = (float *) X;
|
||||
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
F77_chbmv(F77_UL, &F77_N, &F77_K, ALPHA,
|
||||
A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY);
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(1, "cblas_chbmv","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if ( layout == CblasRowMajor )
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if(X!=x)
|
||||
free(x);
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
}
|
||||
while (y != st);
|
||||
}
|
||||
}
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
*
|
||||
* cblas_chemm.c
|
||||
* This program is a C interface to chemm.
|
||||
* Written by Keita Teranishi
|
||||
* 4/8/1998
|
||||
*
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_chemm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side,
|
||||
const CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc)
|
||||
{
|
||||
char SD, UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_SD, F77_UL;
|
||||
#else
|
||||
#define F77_SD &SD
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb;
|
||||
F77_INT F77_ldc=ldc;
|
||||
#else
|
||||
#define F77_M M
|
||||
#define F77_N N
|
||||
#define F77_lda lda
|
||||
#define F77_ldb ldb
|
||||
#define F77_ldc ldc
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if( layout == CblasColMajor )
|
||||
{
|
||||
if( Side == CblasRight) SD='R';
|
||||
else if ( Side == CblasLeft ) SD='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Uplo == CblasUpper) UL='U';
|
||||
else if ( Uplo == CblasLower ) UL='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_SD = C2F_CHAR(&SD);
|
||||
#endif
|
||||
|
||||
F77_chemm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda,
|
||||
B, &F77_ldb, beta, C, &F77_ldc);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if( Side == CblasRight) SD='L';
|
||||
else if ( Side == CblasLeft ) SD='R';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Uplo == CblasUpper) UL='L';
|
||||
else if ( Uplo == CblasLower ) UL='U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_SD = C2F_CHAR(&SD);
|
||||
#endif
|
||||
|
||||
F77_chemm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A,
|
||||
&F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_chemm", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* cblas_chemv.c
|
||||
* The program is a C interface to chemv
|
||||
*
|
||||
* Keita Teranishi 5/18/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_chemv(const CBLAS_LAYOUT layout,
|
||||
const CBLAS_UPLO Uplo, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *X, const int incX, const void *beta,
|
||||
void *Y, const int incY)
|
||||
{
|
||||
char UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_UL;
|
||||
#else
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_lda lda
|
||||
#define F77_incX incx
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
int n=0, i=0, incx=incX;
|
||||
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||
float ALPHA[2],BETA[2];
|
||||
int tincY, tincx;
|
||||
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasUpper) UL = 'U';
|
||||
else if (Uplo == CblasLower) UL = 'L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n",Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
F77_chemv(F77_UL, &F77_N, alpha, A, &F77_lda, X, &F77_incX,
|
||||
beta, Y, &F77_incY);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
ALPHA[0]= *alp;
|
||||
ALPHA[1]= -alp[1];
|
||||
BETA[0]= *bet;
|
||||
BETA[1]= -bet[1];
|
||||
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
|
||||
tx = x;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1 ;
|
||||
tincx = 2;
|
||||
st= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
st = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != st);
|
||||
x=tx;
|
||||
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
#endif
|
||||
|
||||
if(incY > 0)
|
||||
tincY = incY;
|
||||
else
|
||||
tincY = -incY;
|
||||
y++;
|
||||
|
||||
i = tincY << 1;
|
||||
n = i * N ;
|
||||
st = y + n;
|
||||
do {
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
} while(y != st);
|
||||
y -= n;
|
||||
} else
|
||||
x = (float *) X;
|
||||
|
||||
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
F77_chemv(F77_UL, &F77_N, ALPHA, A, &F77_lda, x, &F77_incX,
|
||||
BETA, Y, &F77_incY);
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(1, "cblas_chemv","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if ( layout == CblasRowMajor )
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if ( X != x )
|
||||
free(x);
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
}
|
||||
while (y != st);
|
||||
}
|
||||
}
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,116 @@
|
|||
/*
|
||||
* cblas_cher.c
|
||||
* The program is a C interface to cher.
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cher(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const void *X, const int incX
|
||||
,void *A, const int lda)
|
||||
{
|
||||
char UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_UL;
|
||||
#else
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_lda lda
|
||||
#define F77_incX incx
|
||||
#endif
|
||||
int n, i, tincx, incx=incX;
|
||||
float *x=(float *)X, *xx=(float *)X, *tx, *st;
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasLower) UL = 'L';
|
||||
else if (Uplo == CblasUpper) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n",Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
|
||||
F77_cher(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda);
|
||||
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
tx = x;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1 ;
|
||||
tincx = 2;
|
||||
st= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
st = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != st);
|
||||
x=tx;
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
#endif
|
||||
}
|
||||
else x = (float *) X;
|
||||
F77_cher(F77_UL, &F77_N, &alpha, x, &F77_incX, A, &F77_lda);
|
||||
} else
|
||||
{
|
||||
cblas_xerbla(1, "cblas_cher","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if(X!=x)
|
||||
free(x);
|
||||
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* cblas_cher2.c
|
||||
* The program is a C interface to cher2.
|
||||
*
|
||||
* Keita Teranishi 3/23/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cher2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const int N, const void *alpha, const void *X, const int incX,
|
||||
const void *Y, const int incY, void *A, const int lda)
|
||||
{
|
||||
char UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_UL;
|
||||
#else
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_lda lda
|
||||
#define F77_incX incx
|
||||
#define F77_incY incy
|
||||
#endif
|
||||
int n, i, j, tincx, tincy, incx=incX, incy=incY;
|
||||
float *x=(float *)X, *xx=(float *)X, *y=(float *)Y,
|
||||
*yy=(float *)Y, *tx, *ty, *stx, *sty;
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasLower) UL = 'L';
|
||||
else if (Uplo == CblasUpper) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n",Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
|
||||
F77_cher2(F77_UL, &F77_N, alpha, X, &F77_incX,
|
||||
Y, &F77_incY, A, &F77_lda);
|
||||
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
y = malloc(n*sizeof(float));
|
||||
tx = x;
|
||||
ty = y;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1 ;
|
||||
tincx = 2;
|
||||
stx= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
stx = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
|
||||
if( incY > 0 ) {
|
||||
j = incY << 1;
|
||||
tincy = 2;
|
||||
sty= y+n;
|
||||
} else {
|
||||
j = incY *(-2);
|
||||
tincy = -2;
|
||||
sty = y-2;
|
||||
y +=(n-2);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != stx);
|
||||
|
||||
do
|
||||
{
|
||||
*y = *yy;
|
||||
y[1] = -yy[1];
|
||||
y += tincy ;
|
||||
yy += j;
|
||||
}
|
||||
while (y != sty);
|
||||
|
||||
x=tx;
|
||||
y=ty;
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
F77_incY = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
incy = 1;
|
||||
#endif
|
||||
} else
|
||||
{
|
||||
x = (float *) X;
|
||||
y = (float *) Y;
|
||||
}
|
||||
F77_cher2(F77_UL, &F77_N, alpha, y, &F77_incY, x,
|
||||
&F77_incX, A, &F77_lda);
|
||||
} else
|
||||
{
|
||||
cblas_xerbla(1, "cblas_cher2","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if(X!=x)
|
||||
free(x);
|
||||
if(Y!=y)
|
||||
free(y);
|
||||
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
*
|
||||
* cblas_cher2k.c
|
||||
* This program is a C interface to cher2k.
|
||||
* Written by Keita Teranishi
|
||||
* 4/8/1998
|
||||
*
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cher2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const float beta,
|
||||
void *C, const int ldc)
|
||||
{
|
||||
char UL, TR;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TR, F77_UL;
|
||||
#else
|
||||
#define F77_TR &TR
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
|
||||
F77_INT F77_ldc=ldc;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_ldb ldb
|
||||
#define F77_ldc ldc
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
float ALPHA[2];
|
||||
const float *alp=(float *)alpha;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
if( layout == CblasColMajor )
|
||||
{
|
||||
|
||||
if( Uplo == CblasUpper) UL='U';
|
||||
else if ( Uplo == CblasLower ) UL='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Trans == CblasTrans) TR ='T';
|
||||
else if ( Trans == CblasConjTrans ) TR='C';
|
||||
else if ( Trans == CblasNoTrans ) TR='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TR = C2F_CHAR(&TR);
|
||||
#endif
|
||||
|
||||
F77_cher2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
|
||||
if( Uplo == CblasUpper) UL='L';
|
||||
else if ( Uplo == CblasLower ) UL='U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if( Trans == CblasTrans) TR ='N';
|
||||
else if ( Trans == CblasConjTrans ) TR='N';
|
||||
else if ( Trans == CblasNoTrans ) TR='C';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TR = C2F_CHAR(&TR);
|
||||
#endif
|
||||
|
||||
ALPHA[0]= *alp;
|
||||
ALPHA[1]= -alp[1];
|
||||
F77_cher2k(F77_UL,F77_TR, &F77_N, &F77_K, ALPHA, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_cher2k", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,105 @@
|
|||
/*
|
||||
*
|
||||
* cblas_cherk.c
|
||||
* This program is a C interface to cherk.
|
||||
* Written by Keita Teranishi
|
||||
* 4/8/1998
|
||||
*
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cherk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const float alpha, const void *A, const int lda,
|
||||
const float beta, void *C, const int ldc)
|
||||
{
|
||||
char UL, TR;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TR, F77_UL;
|
||||
#else
|
||||
#define F77_TR &TR
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_K=K, F77_lda=lda;
|
||||
F77_INT F77_ldc=ldc;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_ldc ldc
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if( layout == CblasColMajor )
|
||||
{
|
||||
if( Uplo == CblasUpper) UL='U';
|
||||
else if ( Uplo == CblasLower ) UL='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Trans == CblasTrans) TR ='T';
|
||||
else if ( Trans == CblasConjTrans ) TR='C';
|
||||
else if ( Trans == CblasNoTrans ) TR='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TR = C2F_CHAR(&TR);
|
||||
#endif
|
||||
|
||||
F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda,
|
||||
&beta, C, &F77_ldc);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if( Uplo == CblasUpper) UL='L';
|
||||
else if ( Uplo == CblasLower ) UL='U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if( Trans == CblasTrans) TR ='N';
|
||||
else if ( Trans == CblasConjTrans ) TR='N';
|
||||
else if ( Trans == CblasNoTrans ) TR='C';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_SD = C2F_CHAR(&SD);
|
||||
#endif
|
||||
|
||||
F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda,
|
||||
&beta, C, &F77_ldc);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_cherk", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,160 @@
|
|||
/*
|
||||
* cblas_chpmv.c
|
||||
* The program is a C interface of chpmv
|
||||
*
|
||||
* Keita Teranishi 5/18/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_chpmv(const CBLAS_LAYOUT layout,
|
||||
const CBLAS_UPLO Uplo,const int N,
|
||||
const void *alpha, const void *AP,
|
||||
const void *X, const int incX, const void *beta,
|
||||
void *Y, const int incY)
|
||||
{
|
||||
char UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_UL;
|
||||
#else
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incx
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
int n, i=0, incx=incX;
|
||||
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
|
||||
float ALPHA[2],BETA[2];
|
||||
int tincY, tincx;
|
||||
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasLower) UL = 'L';
|
||||
else if (Uplo == CblasUpper) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n",Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
F77_chpmv(F77_UL, &F77_N, alpha, AP, X,
|
||||
&F77_incX, beta, Y, &F77_incY);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
ALPHA[0]= *alp;
|
||||
ALPHA[1]= -alp[1];
|
||||
BETA[0]= *bet;
|
||||
BETA[1]= -bet[1];
|
||||
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
|
||||
tx = x;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1;
|
||||
tincx = 2;
|
||||
st= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
st = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != st);
|
||||
x=tx;
|
||||
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
#endif
|
||||
|
||||
if(incY > 0)
|
||||
tincY = incY;
|
||||
else
|
||||
tincY = -incY;
|
||||
y++;
|
||||
|
||||
i = tincY << 1;
|
||||
n = i * N ;
|
||||
st = y + n;
|
||||
do {
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
} while(y != st);
|
||||
y -= n;
|
||||
} else
|
||||
x = (float *) X;
|
||||
|
||||
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n", Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
|
||||
F77_chpmv(F77_UL, &F77_N, ALPHA,
|
||||
AP, x, &F77_incX, BETA, Y, &F77_incY);
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(1, "cblas_chpmv","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if ( layout == CblasRowMajor )
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if(X!=x)
|
||||
free(x);
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*y = -(*y);
|
||||
y += i;
|
||||
}
|
||||
while (y != st);
|
||||
}
|
||||
}
|
||||
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* cblas_chpr.c
|
||||
* The program is a C interface to chpr.
|
||||
*
|
||||
* Keita Teranishi 3/23/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_chpr(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const int N, const float alpha, const void *X,
|
||||
const int incX, void *A)
|
||||
{
|
||||
char UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_UL;
|
||||
#else
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incx
|
||||
#endif
|
||||
int n, i, tincx, incx=incX;
|
||||
float *x=(float *)X, *xx=(float *)X, *tx, *st;
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasLower) UL = 'L';
|
||||
else if (Uplo == CblasUpper) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n",Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
|
||||
F77_chpr(F77_UL, &F77_N, &alpha, X, &F77_incX, A);
|
||||
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
tx = x;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1;
|
||||
tincx = 2;
|
||||
st= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
st = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != st);
|
||||
x=tx;
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
#endif
|
||||
}
|
||||
else x = (float *) X;
|
||||
|
||||
F77_chpr(F77_UL, &F77_N, &alpha, x, &F77_incX, A);
|
||||
|
||||
} else
|
||||
{
|
||||
cblas_xerbla(1, "cblas_chpr","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if(X!=x)
|
||||
free(x);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
/*
|
||||
* cblas_chpr2.c
|
||||
* The program is a C interface to chpr2.
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_chpr2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const int N,const void *alpha, const void *X,
|
||||
const int incX,const void *Y, const int incY, void *Ap)
|
||||
|
||||
{
|
||||
char UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_UL;
|
||||
#else
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incx
|
||||
#define F77_incY incy
|
||||
#endif
|
||||
int n, i, j, tincx, tincy, incx=incX, incy=incY;
|
||||
float *x=(float *)X, *xx=(float *)X, *y=(float *)Y,
|
||||
*yy=(float *)Y, *tx, *ty, *stx, *sty;
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasLower) UL = 'L';
|
||||
else if (Uplo == CblasUpper) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n",Uplo );
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
|
||||
F77_chpr2(F77_UL, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, Ap);
|
||||
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
#endif
|
||||
if (N > 0)
|
||||
{
|
||||
n = N << 1;
|
||||
x = malloc(n*sizeof(float));
|
||||
y = malloc(n*sizeof(float));
|
||||
tx = x;
|
||||
ty = y;
|
||||
if( incX > 0 ) {
|
||||
i = incX << 1 ;
|
||||
tincx = 2;
|
||||
stx= x+n;
|
||||
} else {
|
||||
i = incX *(-2);
|
||||
tincx = -2;
|
||||
stx = x-2;
|
||||
x +=(n-2);
|
||||
}
|
||||
|
||||
if( incY > 0 ) {
|
||||
j = incY << 1;
|
||||
tincy = 2;
|
||||
sty= y+n;
|
||||
} else {
|
||||
j = incY *(-2);
|
||||
tincy = -2;
|
||||
sty = y-2;
|
||||
y +=(n-2);
|
||||
}
|
||||
|
||||
do
|
||||
{
|
||||
*x = *xx;
|
||||
x[1] = -xx[1];
|
||||
x += tincx ;
|
||||
xx += i;
|
||||
}
|
||||
while (x != stx);
|
||||
do
|
||||
{
|
||||
*y = *yy;
|
||||
y[1] = -yy[1];
|
||||
y += tincy ;
|
||||
yy += j;
|
||||
}
|
||||
while (y != sty);
|
||||
|
||||
x=tx;
|
||||
y=ty;
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_incX = 1;
|
||||
F77_incY = 1;
|
||||
#else
|
||||
incx = 1;
|
||||
incy = 1;
|
||||
#endif
|
||||
|
||||
} else
|
||||
{
|
||||
x = (float *) X;
|
||||
y = (void *) Y;
|
||||
}
|
||||
F77_chpr2(F77_UL, &F77_N, alpha, y, &F77_incY, x, &F77_incX, Ap);
|
||||
} else
|
||||
{
|
||||
cblas_xerbla(1, "cblas_chpr2","Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if(X!=x)
|
||||
free(x);
|
||||
if(Y!=y)
|
||||
free(y);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* cblas_cscal.c
|
||||
*
|
||||
* The program is a C interface to cscal.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cscal( const int N, const void *alpha, void *X,
|
||||
const int incX)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
F77_cscal( &F77_N, alpha, X, &F77_incX);
|
||||
}
|
|
@ -0,0 +1,21 @@
|
|||
/*
|
||||
* cblas_csscal.c
|
||||
*
|
||||
* The program is a C interface to csscal.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_csscal( const int N, const float alpha, void *X,
|
||||
const int incX)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
F77_csscal( &F77_N, &alpha, X, &F77_incX);
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* cblas_cswap.c
|
||||
*
|
||||
* The program is a C interface to cswap.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_cswap( const int N, void *X, const int incX, void *Y,
|
||||
const int incY)
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
F77_cswap( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
*
|
||||
* cblas_csymm.c
|
||||
* This program is a C interface to csymm.
|
||||
* Written by Keita Teranishi
|
||||
* 4/8/1998
|
||||
*
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_csymm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side,
|
||||
const CBLAS_UPLO Uplo, const int M, const int N,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc)
|
||||
{
|
||||
char SD, UL;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_SD, F77_UL;
|
||||
#else
|
||||
#define F77_SD &SD
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb;
|
||||
F77_INT F77_ldc=ldc;
|
||||
#else
|
||||
#define F77_M M
|
||||
#define F77_N N
|
||||
#define F77_lda lda
|
||||
#define F77_ldb ldb
|
||||
#define F77_ldc ldc
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if( layout == CblasColMajor )
|
||||
{
|
||||
if( Side == CblasRight) SD='R';
|
||||
else if ( Side == CblasLeft ) SD='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Uplo == CblasUpper) UL='U';
|
||||
else if ( Uplo == CblasLower ) UL='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_SD = C2F_CHAR(&SD);
|
||||
#endif
|
||||
|
||||
F77_csymm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda,
|
||||
B, &F77_ldb, beta, C, &F77_ldc);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if( Side == CblasRight) SD='L';
|
||||
else if ( Side == CblasLeft ) SD='R';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Uplo == CblasUpper) UL='L';
|
||||
else if ( Uplo == CblasLower ) UL='U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_SD = C2F_CHAR(&SD);
|
||||
#endif
|
||||
|
||||
F77_csymm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, &F77_lda,
|
||||
B, &F77_ldb, beta, C, &F77_ldc);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_csymm", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
*
|
||||
* cblas_csyr2k.c
|
||||
* This program is a C interface to csyr2k.
|
||||
* Written by Keita Teranishi
|
||||
* 4/8/1998
|
||||
*
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_csyr2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *B, const int ldb, const void *beta,
|
||||
void *C, const int ldc)
|
||||
{
|
||||
char UL, TR;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TR, F77_UL;
|
||||
#else
|
||||
#define F77_TR &TR
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
|
||||
F77_INT F77_ldc=ldc;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_ldb ldb
|
||||
#define F77_ldc ldc
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if( layout == CblasColMajor )
|
||||
{
|
||||
|
||||
if( Uplo == CblasUpper) UL='U';
|
||||
else if ( Uplo == CblasLower ) UL='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Trans == CblasTrans) TR ='T';
|
||||
else if ( Trans == CblasConjTrans ) TR='C';
|
||||
else if ( Trans == CblasNoTrans ) TR='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TR = C2F_CHAR(&TR);
|
||||
#endif
|
||||
|
||||
F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
|
||||
B, &F77_ldb, beta, C, &F77_ldc);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if( Uplo == CblasUpper) UL='L';
|
||||
else if ( Uplo == CblasLower ) UL='U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if( Trans == CblasTrans) TR ='N';
|
||||
else if ( Trans == CblasConjTrans ) TR='N';
|
||||
else if ( Trans == CblasNoTrans ) TR='T';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TR = C2F_CHAR(&TR);
|
||||
#endif
|
||||
|
||||
F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_csyr2k", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
*
|
||||
* cblas_csyrk.c
|
||||
* This program is a C interface to csyrk.
|
||||
* Written by Keita Teranishi
|
||||
* 4/8/1998
|
||||
*
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_csyrk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const CBLAS_TRANSPOSE Trans, const int N, const int K,
|
||||
const void *alpha, const void *A, const int lda,
|
||||
const void *beta, void *C, const int ldc)
|
||||
{
|
||||
char UL, TR;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TR, F77_UL;
|
||||
#else
|
||||
#define F77_TR &TR
|
||||
#define F77_UL &UL
|
||||
#endif
|
||||
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_K=K, F77_lda=lda;
|
||||
F77_INT F77_ldc=ldc;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_ldc ldc
|
||||
#endif
|
||||
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
CBLAS_CallFromC = 1;
|
||||
|
||||
if( layout == CblasColMajor )
|
||||
{
|
||||
|
||||
if( Uplo == CblasUpper) UL='U';
|
||||
else if ( Uplo == CblasLower ) UL='L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if( Trans == CblasTrans) TR ='T';
|
||||
else if ( Trans == CblasConjTrans ) TR='C';
|
||||
else if ( Trans == CblasNoTrans ) TR='N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TR = C2F_CHAR(&TR);
|
||||
#endif
|
||||
|
||||
F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
|
||||
beta, C, &F77_ldc);
|
||||
} else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if( Uplo == CblasUpper) UL='L';
|
||||
else if ( Uplo == CblasLower ) UL='U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if( Trans == CblasTrans) TR ='N';
|
||||
else if ( Trans == CblasConjTrans ) TR='N';
|
||||
else if ( Trans == CblasNoTrans ) TR='T';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TR = C2F_CHAR(&TR);
|
||||
#endif
|
||||
|
||||
F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
|
||||
beta, C, &F77_ldc);
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_csyrk", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
/*
|
||||
* cblas_ctbmv.c
|
||||
* The program is a C interface to ctbmv.
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_ctbmv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
|
||||
const int N, const int K, const void *A, const int lda,
|
||||
void *X, const int incX)
|
||||
{
|
||||
char TA;
|
||||
char UL;
|
||||
char DI;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TA, F77_UL, F77_DI;
|
||||
#else
|
||||
#define F77_TA &TA
|
||||
#define F77_UL &UL
|
||||
#define F77_DI &DI
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
int n, i=0, tincX;
|
||||
float *st=0, *x=(float *)X;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasUpper) UL = 'U';
|
||||
else if (Uplo == CblasLower) UL = 'L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if (TransA == CblasNoTrans) TA = 'N';
|
||||
else if (TransA == CblasTrans) TA = 'T';
|
||||
else if (TransA == CblasConjTrans) TA = 'C';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if (Diag == CblasUnit) DI = 'U';
|
||||
else if (Diag == CblasNonUnit) DI = 'N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(4, "cblas_ctbmv","Illegal Diag setting, %d\n", Diag);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_DI = C2F_CHAR(&DI);
|
||||
#endif
|
||||
F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||
&F77_incX);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (TransA == CblasNoTrans) TA = 'T';
|
||||
else if (TransA == CblasTrans) TA = 'N';
|
||||
else if (TransA == CblasConjTrans)
|
||||
{
|
||||
TA = 'N';
|
||||
if ( N > 0)
|
||||
{
|
||||
if(incX > 0)
|
||||
tincX = incX;
|
||||
else
|
||||
tincX = -incX;
|
||||
i = tincX << 1;
|
||||
n = i * N;
|
||||
x++;
|
||||
st = x + n;
|
||||
do
|
||||
{
|
||||
*x = -(*x);
|
||||
x+= i;
|
||||
}
|
||||
while (x != st);
|
||||
x -= n;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (Diag == CblasUnit) DI = 'U';
|
||||
else if (Diag == CblasNonUnit) DI = 'N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(4, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_DI = C2F_CHAR(&DI);
|
||||
#endif
|
||||
|
||||
F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||
&F77_incX);
|
||||
|
||||
if (TransA == CblasConjTrans)
|
||||
{
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*x = -(*x);
|
||||
x += i;
|
||||
}
|
||||
while (x != st);
|
||||
}
|
||||
}
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_ctbmv", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,162 @@
|
|||
/*
|
||||
* cblas_ctbsv.c
|
||||
* The program is a C interface to ctbsv.
|
||||
*
|
||||
* Keita Teranishi 3/23/98
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_ctbsv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
|
||||
const int N, const int K, const void *A, const int lda,
|
||||
void *X, const int incX)
|
||||
{
|
||||
char TA;
|
||||
char UL;
|
||||
char DI;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TA, F77_UL, F77_DI;
|
||||
#else
|
||||
#define F77_TA &TA
|
||||
#define F77_UL &UL
|
||||
#define F77_DI &DI
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_K K
|
||||
#define F77_lda lda
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
int n, i=0, tincX;
|
||||
float *st=0,*x=(float *)X;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasUpper) UL = 'U';
|
||||
else if (Uplo == CblasLower) UL = 'L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if (TransA == CblasNoTrans) TA = 'N';
|
||||
else if (TransA == CblasTrans) TA = 'T';
|
||||
else if (TransA == CblasConjTrans) TA = 'C';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if (Diag == CblasUnit) DI = 'U';
|
||||
else if (Diag == CblasNonUnit) DI = 'N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_DI = C2F_CHAR(&DI);
|
||||
#endif
|
||||
F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||
&F77_incX);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (TransA == CblasNoTrans) TA = 'T';
|
||||
else if (TransA == CblasTrans) TA = 'N';
|
||||
else if (TransA == CblasConjTrans)
|
||||
{
|
||||
TA = 'N';
|
||||
if ( N > 0)
|
||||
{
|
||||
if ( incX > 0 )
|
||||
tincX = incX;
|
||||
else
|
||||
tincX = -incX;
|
||||
|
||||
n = N*2*(tincX);
|
||||
|
||||
x++;
|
||||
|
||||
st=x+n;
|
||||
|
||||
i = tincX << 1;
|
||||
do
|
||||
{
|
||||
*x = -(*x);
|
||||
x+=i;
|
||||
}
|
||||
while (x != st);
|
||||
x -= n;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (Diag == CblasUnit) DI = 'U';
|
||||
else if (Diag == CblasNonUnit) DI = 'N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_DI = C2F_CHAR(&DI);
|
||||
#endif
|
||||
|
||||
F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
|
||||
&F77_incX);
|
||||
|
||||
if (TransA == CblasConjTrans)
|
||||
{
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*x = -(*x);
|
||||
x+= i;
|
||||
}
|
||||
while (x != st);
|
||||
}
|
||||
}
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_ctbsv", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
|
@ -0,0 +1,152 @@
|
|||
/*
|
||||
* cblas_ctpmv.c
|
||||
* The program is a C interface to ctpmv.
|
||||
*
|
||||
* Keita Teranishi 5/20/98
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_ctpmv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
|
||||
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
|
||||
const int N, const void *Ap, void *X, const int incX)
|
||||
{
|
||||
char TA;
|
||||
char UL;
|
||||
char DI;
|
||||
#ifdef F77_CHAR
|
||||
F77_CHAR F77_TA, F77_UL, F77_DI;
|
||||
#else
|
||||
#define F77_TA &TA
|
||||
#define F77_UL &UL
|
||||
#define F77_DI &DI
|
||||
#endif
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX;
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
int n, i=0, tincX;
|
||||
float *st=0,*x=(float *)X;
|
||||
extern int CBLAS_CallFromC;
|
||||
extern int RowMajorStrg;
|
||||
RowMajorStrg = 0;
|
||||
|
||||
CBLAS_CallFromC = 1;
|
||||
if (layout == CblasColMajor)
|
||||
{
|
||||
if (Uplo == CblasUpper) UL = 'U';
|
||||
else if (Uplo == CblasLower) UL = 'L';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if (TransA == CblasNoTrans) TA = 'N';
|
||||
else if (TransA == CblasTrans) TA = 'T';
|
||||
else if (TransA == CblasConjTrans) TA = 'C';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
if (Diag == CblasUnit) DI = 'U';
|
||||
else if (Diag == CblasNonUnit) DI = 'N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_DI = C2F_CHAR(&DI);
|
||||
#endif
|
||||
F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX);
|
||||
}
|
||||
else if (layout == CblasRowMajor)
|
||||
{
|
||||
RowMajorStrg = 1;
|
||||
if (Uplo == CblasUpper) UL = 'L';
|
||||
else if (Uplo == CblasLower) UL = 'U';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (TransA == CblasNoTrans) TA = 'T';
|
||||
else if (TransA == CblasTrans) TA = 'N';
|
||||
else if (TransA == CblasConjTrans)
|
||||
{
|
||||
TA = 'N';
|
||||
if ( N > 0)
|
||||
{
|
||||
if(incX > 0)
|
||||
tincX = incX;
|
||||
else
|
||||
tincX = -incX;
|
||||
i = tincX << 1;
|
||||
n = i * N;
|
||||
x++;
|
||||
st = x + n;
|
||||
do
|
||||
{
|
||||
*x = -(*x);
|
||||
x += i;
|
||||
}
|
||||
while (x != st);
|
||||
x -= n;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (Diag == CblasUnit) DI = 'U';
|
||||
else if (Diag == CblasNonUnit) DI = 'N';
|
||||
else
|
||||
{
|
||||
cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
||||
#ifdef F77_CHAR
|
||||
F77_UL = C2F_CHAR(&UL);
|
||||
F77_TA = C2F_CHAR(&TA);
|
||||
F77_DI = C2F_CHAR(&DI);
|
||||
#endif
|
||||
|
||||
F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX);
|
||||
if (TransA == CblasConjTrans)
|
||||
{
|
||||
if (N > 0)
|
||||
{
|
||||
do
|
||||
{
|
||||
*x = -(*x);
|
||||
x += i;
|
||||
}
|
||||
while (x != st);
|
||||
}
|
||||
}
|
||||
}
|
||||
else cblas_xerbla(1, "cblas_ctpmv", "Illegal layout setting, %d\n", layout);
|
||||
CBLAS_CallFromC = 0;
|
||||
RowMajorStrg = 0;
|
||||
return;
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue