Merge pull request #696 from ashwinyes/develop_20151120_lapack_test_fixes

Cortex A57 fixes and Lapack 3.6.0
This commit is contained in:
Zhang Xianyi 2015-11-23 11:04:42 -06:00
commit b4380acf77
3192 changed files with 136729 additions and 29373 deletions

View File

@ -249,10 +249,14 @@ ifndef NOFORTRAN
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(FC), gfortran)
ifeq ($(F_COMPILER), GFORTRAN)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
ifdef SMP
ifeq ($(OSNAME), WINNT)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
@ -288,7 +292,17 @@ endif
lapack-test :
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
endif
lapack-runtest:
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out)

View File

@ -971,16 +971,29 @@ ifeq ($(DEBUG), 1)
COMMON_OPT += -g
endif
ifeq ($(DEBUG), 1)
FCOMMON_OPT += -g
endif
ifndef COMMON_OPT
COMMON_OPT = -O2
endif
ifndef FCOMMON_OPT
ifeq ($(OSNAME), WINNT)
FCOMMON_OPT = -O0
else
FCOMMON_OPT = -O2 -frecursive
endif
endif
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR)
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF)
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT)
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF)
override FFLAGS += $(FCOMMON_OPT)
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
#MAKEOVERRIDES =
#For LAPACK Fortran codes.

View File

@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ASSEMBLER
static void __inline blas_lock(volatile BLASULONG *address){
long register ret;
BLASULONG ret;
do {
while (*address) {YIELDING;};
__asm__ __volatile__(
"ldaxr %0, [%1] \n\t"
"stlxr w2, %2, [%1] \n\t"
"orr %0, %0, x2 \n\t"
: "=r"(ret)
: "r"(address), "r"(1l)
: "memory", "x2"
"mov x4, #1 \n\t"
"1: \n\t"
"ldaxr x2, [%1] \n\t"
"cbnz x2, 1b \n\t"
"2: \n\t"
"stxr w3, x4, [%1] \n\t"
"cbnz w3, 1b \n\t"
"mov %0, #0 \n\t"
: "=r"(ret), "=r"(address)
: "1"(address)
: "memory", "x2" , "x3", "x4"
);
} while (ret);
MB;
}
#define BLAS_LOCK_DEFINED
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
@ -110,7 +121,7 @@ REALNAME:
#define HUGE_PAGESIZE ( 4 << 20)
#if defined(CORTEXA57)
#define BUFFER_SIZE (40 << 20)
#define BUFFER_SIZE (20 << 20)
#else
#define BUFFER_SIZE (16 << 20)
#endif

View File

@ -104,6 +104,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <errno.h>
#include <linux/unistd.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/resource.h>
#endif
#if defined(OS_FREEBSD) || defined(OS_DARWIN)
@ -1361,6 +1363,18 @@ void CONSTRUCTOR gotoblas_init(void) {
gotoblas_memory_init();
#endif
#if defined(OS_LINUX)
struct rlimit curlimit;
if ( getrlimit(RLIMIT_STACK, &curlimit ) == 0 )
{
if ( curlimit.rlim_cur != curlimit.rlim_max )
{
curlimit.rlim_cur = curlimit.rlim_max;
setrlimit(RLIMIT_STACK, &curlimit);
}
}
#endif
#ifdef SMP
if (blas_cpu_number == 0) blas_get_cpu_number();
#ifdef SMP_SERVER

View File

@ -173,18 +173,18 @@
sgbbrd, sgbcon, sgbequ, sgbrfs, sgbsv,
sgbsvx, sgbtf2, sgbtrf, sgbtrs, sgebak, sgebal, sgebd2,
sgebrd, sgecon, sgeequ, sgees, sgeesx, sgeev, sgeevx,
sgegs, sgegv, sgehd2, sgehrd, sgelq2, sgelqf,
sgels, sgelsd, sgelss, sgelsx, sgelsy, sgeql2, sgeqlf,
sgeqp3, sgeqpf, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
sgehd2, sgehrd, sgelq2, sgelqf,
sgels, sgelsd, sgelss, sgelsy, sgeql2, sgeqlf,
sgeqp3, sgeqr2, sgeqr2p, sgeqrf, sgeqrfp, sgerfs,
sgerq2, sgerqf, sgesc2, sgesdd, sgesvd, sgesvx,
sgetc2, sgetri,
sggbak, sggbal, sgges, sggesx, sggev, sggevx,
sggglm, sgghrd, sgglse, sggqrf,
sggrqf, sggsvd, sggsvp, sgtcon, sgtrfs, sgtsv,
sggrqf, sgtcon, sgtrfs, sgtsv,
sgtsvx, sgttrf, sgttrs, sgtts2, shgeqz,
shsein, shseqr, slabrd, slacon, slacn2,
slaein, slaexc, slag2, slags2, slagtm, slagv2, slahqr,
slahrd, slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
slahr2, slaic1, slaln2, slals0, slalsa, slalsd,
slangb, slange, slangt, slanhs, slansb, slansp,
slansy, slantb, slantp, slantr, slanv2,
slapll, slapmt,
@ -194,7 +194,7 @@
slarf, slarfb, slarfg, slarfgp, slarft, slarfx, slargv,
slarrv, slartv,
slarz, slarzb, slarzt, slasy2, slasyf,
slatbs, slatdf, slatps, slatrd, slatrs, slatrz, slatzm,
slatbs, slatdf, slatps, slatrd, slatrs, slatrz,
sopgtr, sopmtr, sorg2l, sorg2r,
sorgbr, sorghr, sorgl2, sorglq, sorgql, sorgqr, sorgr2,
sorgrq, sorgtr, sorm2l, sorm2r,
@ -220,7 +220,7 @@
stgsja, stgsna, stgsy2, stgsyl, stpcon, stprfs, stptri,
stptrs,
strcon, strevc, strexc, strrfs, strsen, strsna, strsyl,
strtrs, stzrqf, stzrzf, sstemr,
strtrs, stzrzf, sstemr,
slansf, spftrf, spftri, spftrs, ssfrk, stfsm, stftri, stfttp,
stfttr, stpttf, stpttr, strttf, strttp,
sgejsv, sgesvj, sgsvj0, sgsvj1,
@ -245,14 +245,13 @@
cbdsqr, cgbbrd, cgbcon, cgbequ, cgbrfs, cgbsv, cgbsvx,
cgbtf2, cgbtrf, cgbtrs, cgebak, cgebal, cgebd2, cgebrd,
cgecon, cgeequ, cgees, cgeesx, cgeev, cgeevx,
cgegs, cgegv, cgehd2, cgehrd, cgelq2, cgelqf,
cgels, cgelsd, cgelss, cgelsx, cgelsy, cgeql2, cgeqlf, cgeqp3,
cgeqpf, cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
cgehd2, cgehrd, cgelq2, cgelqf,
cgels, cgelsd, cgelss, cgelsy, cgeql2, cgeqlf, cgeqp3,
cgeqr2, cgeqr2p, cgeqrf, cgeqrfp, cgerfs,
cgerq2, cgerqf, cgesc2, cgesdd, cgesvd,
cgesvx, cgetc2, cgetri,
cggbak, cggbal, cgges, cggesx, cggev, cggevx, cggglm,
cgghrd, cgglse, cggqrf, cggrqf,
cggsvd, cggsvp,
cgtcon, cgtrfs, cgtsv, cgtsvx, cgttrf, cgttrs, cgtts2, chbev,
chbevd, chbevx, chbgst, chbgv, chbgvd, chbgvx, chbtrd,
checon, cheev, cheevd, cheevr, cheevx, chegs2, chegst,
@ -267,7 +266,7 @@
claed0, claed7, claed8,
claein, claesy, claev2, clags2, clagtm,
clahef, clahqr,
clahrd, clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
clahr2, claic1, clals0, clalsa, clalsd, clangb, clange, clangt,
clanhb, clanhe,
clanhp, clanhs, clanht, clansb, clansp, clansy, clantb,
clantp, clantr, clapll, clapmt, clarcm, claqgb, claqge,
@ -278,7 +277,7 @@
clarfx, clargv, clarnv, clarrv, clartg, clartv,
clarz, clarzb, clarzt, clascl, claset, clasr, classq,
clasyf, clatbs, clatdf, clatps, clatrd, clatrs, clatrz,
clatzm, cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
cpbcon, cpbequ, cpbrfs, cpbstf, cpbsv,
cpbsvx, cpbtf2, cpbtrf, cpbtrs, cpocon, cpoequ, cporfs,
cposv, cposvx, cpstrf, cpstf2,
cppcon, cppequ, cpprfs, cppsv, cppsvx, cpptrf, cpptri, cpptrs,
@ -293,7 +292,7 @@
ctgexc, ctgsen, ctgsja, ctgsna, ctgsy2, ctgsyl, ctpcon,
ctprfs, ctptri,
ctptrs, ctrcon, ctrevc, ctrexc, ctrrfs, ctrsen, ctrsna,
ctrsyl, ctrtrs, ctzrqf, ctzrzf, cung2l, cung2r,
ctrsyl, ctrtrs, ctzrzf, cung2l, cung2r,
cungbr, cunghr, cungl2, cunglq, cungql, cungqr, cungr2,
cungrq, cungtr, cunm2l, cunm2r, cunmbr, cunmhr, cunml2,
cunmlq, cunmql, cunmqr, cunmr2, cunmr3, cunmrq, cunmrz,
@ -321,18 +320,18 @@
dgbbrd, dgbcon, dgbequ, dgbrfs, dgbsv,
dgbsvx, dgbtf2, dgbtrf, dgbtrs, dgebak, dgebal, dgebd2,
dgebrd, dgecon, dgeequ, dgees, dgeesx, dgeev, dgeevx,
dgegs, dgegv, dgehd2, dgehrd, dgelq2, dgelqf,
dgels, dgelsd, dgelss, dgelsx, dgelsy, dgeql2, dgeqlf,
dgeqp3, dgeqpf, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
dgehd2, dgehrd, dgelq2, dgelqf,
dgels, dgelsd, dgelss, dgelsy, dgeql2, dgeqlf,
dgeqp3, dgeqr2, dgeqr2p, dgeqrf, dgeqrfp, dgerfs,
dgerq2, dgerqf, dgesc2, dgesdd, dgesvd, dgesvx,
dgetc2, dgetri,
dggbak, dggbal, dgges, dggesx, dggev, dggevx,
dggglm, dgghrd, dgglse, dggqrf,
dggrqf, dggsvd, dggsvp, dgtcon, dgtrfs, dgtsv,
dggrqf, dgtcon, dgtrfs, dgtsv,
dgtsvx, dgttrf, dgttrs, dgtts2, dhgeqz,
dhsein, dhseqr, dlabrd, dlacon, dlacn2,
dlaein, dlaexc, dlag2, dlags2, dlagtm, dlagv2, dlahqr,
dlahrd, dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
dlahr2, dlaic1, dlaln2, dlals0, dlalsa, dlalsd,
dlangb, dlange, dlangt, dlanhs, dlansb, dlansp,
dlansy, dlantb, dlantp, dlantr, dlanv2,
dlapll, dlapmt,
@ -342,7 +341,7 @@
dlarf, dlarfb, dlarfg, dlarfgp, dlarft, dlarfx,
dlargv, dlarrv, dlartv,
dlarz, dlarzb, dlarzt, dlasy2, dlasyf,
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz, dlatzm,
dlatbs, dlatdf, dlatps, dlatrd, dlatrs, dlatrz,
dopgtr, dopmtr, dorg2l, dorg2r,
dorgbr, dorghr, dorgl2, dorglq, dorgql, dorgqr, dorgr2,
dorgrq, dorgtr, dorm2l, dorm2r,
@ -368,7 +367,7 @@
dtgsja, dtgsna, dtgsy2, dtgsyl, dtpcon, dtprfs, dtptri,
dtptrs,
dtrcon, dtrevc, dtrexc, dtrrfs, dtrsen, dtrsna, dtrsyl,
dtrtrs, dtzrqf, dtzrzf, dstemr,
dtrtrs, dtzrzf, dstemr,
dsgesv, dsposv, dlag2s, slag2d, dlat2s,
dlansf, dpftrf, dpftri, dpftrs, dsfrk, dtfsm, dtftri, dtfttp,
dtfttr, dtpttf, dtpttr, dtrttf, dtrttp,
@ -387,14 +386,13 @@
zbdsqr, zgbbrd, zgbcon, zgbequ, zgbrfs, zgbsv, zgbsvx,
zgbtf2, zgbtrf, zgbtrs, zgebak, zgebal, zgebd2, zgebrd,
zgecon, zgeequ, zgees, zgeesx, zgeev, zgeevx,
zgegs, zgegv, zgehd2, zgehrd, zgelq2, zgelqf,
zgels, zgelsd, zgelss, zgelsx, zgelsy, zgeql2, zgeqlf, zgeqp3,
zgeqpf, zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
zgehd2, zgehrd, zgelq2, zgelqf,
zgels, zgelsd, zgelss, zgelsy, zgeql2, zgeqlf, zgeqp3,
zgeqr2, zgeqr2p, zgeqrf, zgeqrfp, zgerfs, zgerq2, zgerqf,
zgesc2, zgesdd, zgesvd, zgesvx, zgetc2,
zgetri,
zggbak, zggbal, zgges, zggesx, zggev, zggevx, zggglm,
zgghrd, zgglse, zggqrf, zggrqf,
zggsvd, zggsvp,
zgtcon, zgtrfs, zgtsv, zgtsvx, zgttrf, zgttrs, zgtts2, zhbev,
zhbevd, zhbevx, zhbgst, zhbgv, zhbgvd, zhbgvx, zhbtrd,
zhecon, zheev, zheevd, zheevr, zheevx, zhegs2, zhegst,
@ -409,7 +407,7 @@
zlaed0, zlaed7, zlaed8,
zlaein, zlaesy, zlaev2, zlags2, zlagtm,
zlahef, zlahqr,
zlahrd, zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
zlahr2, zlaic1, zlals0, zlalsa, zlalsd, zlangb, zlange,
zlangt, zlanhb,
zlanhe,
zlanhp, zlanhs, zlanht, zlansb, zlansp, zlansy, zlantb,
@ -422,7 +420,7 @@
zlarfx, zlargv, zlarnv, zlarrv, zlartg, zlartv,
zlarz, zlarzb, zlarzt, zlascl, zlaset, zlasr,
zlassq, zlasyf,
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz, zlatzm,
zlatbs, zlatdf, zlatps, zlatrd, zlatrs, zlatrz,
zpbcon, zpbequ, zpbrfs, zpbstf, zpbsv,
zpbsvx, zpbtf2, zpbtrf, zpbtrs, zpocon, zpoequ, zporfs,
zposv, zposvx, zpotrs, zpstrf, zpstf2,
@ -438,7 +436,7 @@
ztgexc, ztgsen, ztgsja, ztgsna, ztgsy2, ztgsyl, ztpcon,
ztprfs, ztptri,
ztptrs, ztrcon, ztrevc, ztrexc, ztrrfs, ztrsen, ztrsna,
ztrsyl, ztrtrs, ztzrqf, ztzrzf, zung2l,
ztrsyl, ztrtrs, ztzrzf, zung2l,
zung2r, zungbr, zunghr, zungl2, zunglq, zungql, zungqr, zungr2,
zungrq, zungtr, zunm2l, zunm2r, zunmbr, zunmhr, zunml2,
zunmlq, zunmql, zunmqr, zunmr2, zunmr3, zunmrq, zunmrz,
@ -452,6 +450,140 @@
zunbdb5, zunbdb6, zuncsd, zuncsd2by1,
zgeqrt, zgeqrt2, zgeqrt3, zgemqrt,
ztpqrt, ztpqrt2, ztpmqrt, ztprfb,
# functions added for lapack-3.6.0
cgejsv,
cgesvdx,
cgesvj,
cgetrf2,
cgges3,
cggev3,
cgghd3,
cggsvd3,
cggsvp3,
cgsvj0,
cgsvj1,
clagge,
claghe,
clagsy,
clahilb,
clakf2,
clarge,
clarnd,
claror,
clarot,
clatm1,
clatm2,
clatm3,
clatm5,
clatm6,
clatme,
clatmr,
clatms,
clatmt,
cpotrf2,
csbmv,
cspr2,
csyr2,
cunm22,
dbdsvdx,
dgesvdx,
dgetrf2,
dgges3,
dggev3,
dgghd3,
dggsvd3,
dggsvp3,
dladiv2,
dlagge,
dlagsy,
dlahilb,
dlakf2,
dlaran,
dlarge,
dlarnd,
dlaror,
dlarot,
dlatm1,
dlatm2,
dlatm3,
dlatm5,
dlatm6,
dlatm7,
dlatme,
dlatmr,
dlatms,
dlatmt,
dorm22,
dpotrf2,
dsecnd,
sbdsvdx,
second,
sgesvdx,
sgetrf2,
sgges3,
sggev3,
sgghd3,
sggsvd3,
sggsvp3,
sladiv2,
slagge,
slagsy,
slahilb,
slakf2,
slaran,
slarge,
slarnd,
slaror,
slarot,
slatm1,
slatm2,
slatm3,
slatm5,
slatm6,
slatm7,
slatme,
slatmr,
slatms,
slatmt,
sorm22,
spotrf2,
xerbla,
zgejsv,
zgesvdx,
zgesvj,
zgetrf2,
zgges3,
zggev3,
zgghd3,
zggsvd3,
zggsvp3,
zgsvj0,
zgsvj1,
zlagge,
zlaghe,
zlagsy,
zlahilb,
zlakf2,
zlarge,
zlarnd,
zlaror,
zlarot,
zlatm1,
zlatm2,
zlatm3,
zlatm5,
zlatm6,
zlatme,
zlatmr,
zlatms,
zlatmt,
zpotrf2,
zsbmv,
zspr2,
zsyr2,
zunm22
);
@lapack_extendedprecision_objs = (
@ -682,8 +814,6 @@
LAPACKE_cgeqlf_work,
LAPACKE_cgeqp3,
LAPACKE_cgeqp3_work,
LAPACKE_cgeqpf,
LAPACKE_cgeqpf_work,
LAPACKE_cgeqr2,
LAPACKE_cgeqr2_work,
LAPACKE_cgeqrf,
@ -738,10 +868,6 @@
LAPACKE_cggqrf_work,
LAPACKE_cggrqf,
LAPACKE_cggrqf_work,
LAPACKE_cggsvd,
LAPACKE_cggsvd_work,
LAPACKE_cggsvp,
LAPACKE_cggsvp_work,
LAPACKE_cgtcon,
LAPACKE_cgtcon_work,
LAPACKE_cgtrfs,
@ -1186,8 +1312,6 @@
LAPACKE_dgeqlf_work,
LAPACKE_dgeqp3,
LAPACKE_dgeqp3_work,
LAPACKE_dgeqpf,
LAPACKE_dgeqpf_work,
LAPACKE_dgeqr2,
LAPACKE_dgeqr2_work,
LAPACKE_dgeqrf,
@ -1244,10 +1368,6 @@
LAPACKE_dggqrf_work,
LAPACKE_dggrqf,
LAPACKE_dggrqf_work,
LAPACKE_dggsvd,
LAPACKE_dggsvd_work,
LAPACKE_dggsvp,
LAPACKE_dggsvp_work,
LAPACKE_dgtcon,
LAPACKE_dgtcon_work,
LAPACKE_dgtrfs,
@ -1676,8 +1796,6 @@
LAPACKE_sgeqlf_work,
LAPACKE_sgeqp3,
LAPACKE_sgeqp3_work,
LAPACKE_sgeqpf,
LAPACKE_sgeqpf_work,
LAPACKE_sgeqr2,
LAPACKE_sgeqr2_work,
LAPACKE_sgeqrf,
@ -1734,10 +1852,6 @@
LAPACKE_sggqrf_work,
LAPACKE_sggrqf,
LAPACKE_sggrqf_work,
LAPACKE_sggsvd,
LAPACKE_sggsvd_work,
LAPACKE_sggsvp,
LAPACKE_sggsvp_work,
LAPACKE_sgtcon,
LAPACKE_sgtcon_work,
LAPACKE_sgtrfs,
@ -2158,8 +2272,6 @@
LAPACKE_zgeqlf_work,
LAPACKE_zgeqp3,
LAPACKE_zgeqp3_work,
LAPACKE_zgeqpf,
LAPACKE_zgeqpf_work,
LAPACKE_zgeqr2,
LAPACKE_zgeqr2_work,
LAPACKE_zgeqrf,
@ -2214,10 +2326,6 @@
LAPACKE_zggqrf_work,
LAPACKE_zggrqf,
LAPACKE_zggrqf_work,
LAPACKE_zggsvd,
LAPACKE_zggsvd_work,
LAPACKE_zggsvp,
LAPACKE_zggsvp_work,
LAPACKE_zgtcon,
LAPACKE_zgtcon_work,
LAPACKE_zgtrfs,
@ -2707,6 +2815,134 @@
LAPACKE_slagsy_work,
LAPACKE_zlagsy,
LAPACKE_zlagsy_work,
## new function from lapack-3.6.0
LAPACKE_cgejsv,
LAPACKE_cgejsv_work,
LAPACKE_cgesvdx,
LAPACKE_cgesvdx_work,
LAPACKE_cgesvj,
LAPACKE_cgesvj_work,
LAPACKE_cgetrf2,
LAPACKE_cgetrf2_work,
LAPACKE_cgges3,
LAPACKE_cgges3_work,
LAPACKE_cggev3,
LAPACKE_cggev3_work,
LAPACKE_cgghd3,
LAPACKE_cgghd3_work,
LAPACKE_cggsvd3,
LAPACKE_cggsvd3_work,
LAPACKE_cggsvp3,
LAPACKE_cggsvp3_work,
LAPACKE_chetrf_rook,
LAPACKE_chetrf_rook_work,
LAPACKE_chetrs_rook,
LAPACKE_chetrs_rook_work,
LAPACKE_clapmt,
LAPACKE_clapmt_work,
LAPACKE_clascl,
LAPACKE_clascl_work,
LAPACKE_cpotrf2,
LAPACKE_cpotrf2_work,
LAPACKE_csytrf_rook,
LAPACKE_csytrf_rook_work,
LAPACKE_csytrs_rook,
LAPACKE_csytrs_rook_work,
LAPACKE_cuncsd2by1,
LAPACKE_cuncsd2by1_work,
LAPACKE_dbdsvdx,
LAPACKE_dbdsvdx_work,
LAPACKE_dgesvdx,
LAPACKE_dgesvdx_work,
LAPACKE_dgetrf2,
LAPACKE_dgetrf2_work,
LAPACKE_dgges3,
LAPACKE_dgges3_work,
LAPACKE_dggev3,
LAPACKE_dggev3_work,
LAPACKE_dgghd3,
LAPACKE_dgghd3_work,
LAPACKE_dggsvd3,
LAPACKE_dggsvd3_work,
LAPACKE_dggsvp3,
LAPACKE_dggsvp3_work,
LAPACKE_dlapmt,
LAPACKE_dlapmt_work,
LAPACKE_dlascl,
LAPACKE_dlascl_work,
LAPACKE_dorcsd2by1,
LAPACKE_dorcsd2by1_work,
LAPACKE_dpotrf2,
LAPACKE_dpotrf2_work,
LAPACKE_dsytrf_rook,
LAPACKE_dsytrf_rook_work,
LAPACKE_dsytrs_rook,
LAPACKE_dsytrs_rook_work,
LAPACKE_sbdsvdx,
LAPACKE_sbdsvdx_work,
LAPACKE_sgesvdx,
LAPACKE_sgesvdx_work,
LAPACKE_sgetrf2,
LAPACKE_sgetrf2_work,
LAPACKE_sgges3,
LAPACKE_sgges3_work,
LAPACKE_sggev3,
LAPACKE_sggev3_work,
LAPACKE_sgghd3,
LAPACKE_sgghd3_work,
LAPACKE_sggsvd3,
LAPACKE_sggsvd3_work,
LAPACKE_sggsvp3,
LAPACKE_sggsvp3_work,
LAPACKE_slapmt,
LAPACKE_slapmt_work,
LAPACKE_slascl,
LAPACKE_slascl_work,
LAPACKE_sorcsd2by1,
LAPACKE_sorcsd2by1_work,
LAPACKE_spotrf2,
LAPACKE_spotrf2_work,
LAPACKE_ssytrf_rook,
LAPACKE_ssytrf_rook_work,
LAPACKE_ssytrs_rook,
LAPACKE_ssytrs_rook_work,
LAPACKE_stpqrt,
LAPACKE_stpqrt_work,
LAPACKE_zgejsv,
LAPACKE_zgejsv_work,
LAPACKE_zgesvdx,
LAPACKE_zgesvdx_work,
LAPACKE_zgesvj,
LAPACKE_zgesvj_work,
LAPACKE_zgetrf2,
LAPACKE_zgetrf2_work,
LAPACKE_zgges3,
LAPACKE_zgges3_work,
LAPACKE_zggev3,
LAPACKE_zggev3_work,
LAPACKE_zgghd3,
LAPACKE_zgghd3_work,
LAPACKE_zggsvd3,
LAPACKE_zggsvd3_work,
LAPACKE_zggsvp3,
LAPACKE_zggsvp3_work,
LAPACKE_zhetrf_rook,
LAPACKE_zhetrf_rook_work,
LAPACKE_zhetrs_rook,
LAPACKE_zhetrs_rook_work,
LAPACKE_zlapmt,
LAPACKE_zlapmt_work,
LAPACKE_zlascl,
LAPACKE_zlascl_work,
LAPACKE_zpotrf2,
LAPACKE_zpotrf2_work,
LAPACKE_zsytrf_rook,
LAPACKE_zsytrf_rook_work,
LAPACKE_zsytrs_rook,
LAPACKE_zsytrs_rook_work,
LAPACKE_zuncsd2by1,
LAPACKE_zuncsd2by1_work
);
#These function may need 2 underscores.

View File

@ -5,8 +5,8 @@ DAMAXKERNEL = amax.S
CAMAXKERNEL = zamax.S
ZAMAXKERNEL = zamax.S
ISAMAXKERNEL = isamax.S
IDAMAXKERNEL = idamax.S
ISAMAXKERNEL = iamax.S
IDAMAXKERNEL = iamax.S
ICAMAXKERNEL = izamax.S
IZAMAXKERNEL = izamax.S
@ -25,13 +25,13 @@ DCOPYKERNEL = copy.S
CCOPYKERNEL = copy.S
ZCOPYKERNEL = copy.S
DOTKERNEL = dot.S
SDOTKERNEL = dot.S
DDOTKERNEL = dot.S
CDOTKERNEL = zdot.S
ZDOTKERNEL = zdot.S
SNRM2KERNEL = snrm2.S
DNRM2KERNEL = dnrm2.S
SNRM2KERNEL = nrm2.S
DNRM2KERNEL = nrm2.S
CNRM2KERNEL = znrm2.S
ZNRM2KERNEL = znrm2.S
@ -40,7 +40,7 @@ DROTKERNEL = rot.S
CROTKERNEL = zrot.S
ZROTKERNEL = zrot.S
SCALKERNEL = scal.S
SSCALKERNEL = scal.S
DSCALKERNEL = scal.S
CSCALKERNEL = zscal.S
ZSCALKERNEL = zscal.S

View File

@ -181,73 +181,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmul v16.4s, v0.4s, v8.4s[0]
OP_ii v16.4s, v1.4s, v9.4s[0]
fmul v17.4s, v0.4s, v9.4s[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v17.4s, v17.4s
eor v17.16b, v17.16b, v17.16b
fmls v17.4s, v0.4s, v9.4s[0]
#else
fmul v17.4s, v0.4s, v9.4s[0]
#endif
OP_ir v17.4s, v1.4s, v8.4s[0]
fmul v20.4s, v0.4s, v8.4s[1]
OP_ii v20.4s, v1.4s, v9.4s[1]
fmul v21.4s, v0.4s, v9.4s[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v21.4s, v21.4s
eor v21.16b, v21.16b, v21.16b
fmls v21.4s, v0.4s, v9.4s[1]
#else
fmul v21.4s, v0.4s, v9.4s[1]
#endif
OP_ir v21.4s, v1.4s, v8.4s[1]
fmul v24.4s, v0.4s, v8.4s[2]
OP_ii v24.4s, v1.4s, v9.4s[2]
fmul v25.4s, v0.4s, v9.4s[2]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v25.4s, v25.4s
eor v25.16b, v25.16b, v25.16b
fmls v25.4s, v0.4s, v9.4s[2]
#else
fmul v25.4s, v0.4s, v9.4s[2]
#endif
OP_ir v25.4s, v1.4s, v8.4s[2]
fmul v28.4s, v0.4s, v8.4s[3]
OP_ii v28.4s, v1.4s, v9.4s[3]
fmul v29.4s, v0.4s, v9.4s[3]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v29.4s, v29.4s
eor v29.16b, v29.16b, v29.16b
fmls v29.4s, v0.4s, v9.4s[3]
#else
fmul v29.4s, v0.4s, v9.4s[3]
#endif
OP_ir v29.4s, v1.4s, v8.4s[3]
fmul v18.4s, v2.4s, v8.4s[0]
OP_ii v18.4s, v3.4s, v9.4s[0]
fmul v19.4s, v2.4s, v9.4s[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v19.4s, v19.4s
eor v19.16b, v19.16b, v19.16b
fmls v19.4s, v2.4s, v9.4s[0]
#else
fmul v19.4s, v2.4s, v9.4s[0]
#endif
OP_ir v19.4s, v3.4s, v8.4s[0]
fmul v22.4s, v2.4s, v8.4s[1]
OP_ii v22.4s, v3.4s, v9.4s[1]
fmul v23.4s, v2.4s, v9.4s[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v23.4s, v23.4s
eor v23.16b, v23.16b, v23.16b
fmls v23.4s, v2.4s, v9.4s[1]
#else
fmul v23.4s, v2.4s, v9.4s[1]
#endif
OP_ir v23.4s, v3.4s, v8.4s[1]
fmul v26.4s, v2.4s, v8.4s[2]
OP_ii v26.4s, v3.4s, v9.4s[2]
fmul v27.4s, v2.4s, v9.4s[2]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v27.4s, v27.4s
eor v27.16b, v27.16b, v27.16b
fmls v27.4s, v2.4s, v9.4s[2]
#else
fmul v27.4s, v2.4s, v9.4s[2]
#endif
OP_ir v27.4s, v3.4s, v8.4s[2]
fmul v30.4s, v2.4s, v8.4s[3]
OP_ii v30.4s, v3.4s, v9.4s[3]
fmul v31.4s, v2.4s, v9.4s[3]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v31.4s, v31.4s
eor v31.16b, v31.16b, v31.16b
fmls v31.4s, v2.4s, v9.4s[3]
#else
fmul v31.4s, v2.4s, v9.4s[3]
#endif
OP_ir v31.4s, v3.4s, v8.4s[3]

View File

@ -172,37 +172,45 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmul v16.4s, v0.4s, v8.4s[0]
OP_ii v16.4s, v1.4s, v9.4s[0]
fmul v17.4s, v0.4s, v9.4s[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v17.4s, v17.4s
eor v17.16b, v17.16b, v17.16b
fmls v17.4s, v0.4s, v9.4s[0]
#else
fmul v17.4s, v0.4s, v9.4s[0]
#endif
OP_ir v17.4s, v1.4s, v8.4s[0]
fmul v20.4s, v0.4s, v8.4s[1]
OP_ii v20.4s, v1.4s, v9.4s[1]
fmul v21.4s, v0.4s, v9.4s[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v21.4s, v21.4s
eor v21.16b, v21.16b, v21.16b
fmls v21.4s, v0.4s, v9.4s[1]
#else
fmul v21.4s, v0.4s, v9.4s[1]
#endif
OP_ir v21.4s, v1.4s, v8.4s[1]
fmul v24.4s, v0.4s, v8.4s[2]
OP_ii v24.4s, v1.4s, v9.4s[2]
fmul v25.4s, v0.4s, v9.4s[2]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v25.4s, v25.4s
eor v25.16b, v25.16b, v25.16b
fmls v25.4s, v0.4s, v9.4s[2]
#else
fmul v25.4s, v0.4s, v9.4s[2]
#endif
OP_ir v25.4s, v1.4s, v8.4s[2]
fmul v28.4s, v0.4s, v8.4s[3]
OP_ii v28.4s, v1.4s, v9.4s[3]
fmul v29.4s, v0.4s, v9.4s[3]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v29.4s, v29.4s
eor v29.16b, v29.16b, v29.16b
fmls v29.4s, v0.4s, v9.4s[3]
#else
fmul v29.4s, v0.4s, v9.4s[3]
#endif
OP_ir v29.4s, v1.4s, v8.4s[3]

View File

@ -45,16 +45,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define COND ge
#endif
#if !defined(DOUBLE)
#define MAXF s0
#define TMPF s1
#define TMPVF {v1.s}[0]
#define SZ 4
#else
#define MAXF d0
#define TMPF d1
#define TMPVF {v1.d}[0]
#define SZ 8
#endif
/******************************************************************************/
.macro INIT_S
#if !defined(DOUBLE)
lsl INC_X, INC_X, #2
ld1 {v0.s}[0], [X], INC_X
#else
lsl INC_X, INC_X, #3
ld1 {v0.d}[0], [X], INC_X
#endif
mov Z, #1
mov INDEX, Z
fabs MAXF, MAXF
@ -107,7 +119,6 @@ iamax_kernel_S1:
iamax_kernel_S10:
KERNEL_S1
subs I, I, #1
bne iamax_kernel_S10

View File

@ -1,213 +0,0 @@
/*******************************************************************************
Copyright (c) 2015, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define ASSEMBLER
#include "common.h"
#define N x0 /* vector length */
#define X x1 /* X vector address */
#define INC_X x2 /* X stride */
#define INDEX x3 /* index of max/min value */
#define Z x4 /* vector index */
#define I x5 /* loop variable */
#define X_COPY x6 /* copy of X address */
#define MAXF_Z x7
/*******************************************************************************
* Macro definitions
*******************************************************************************/
#define MAXF s5
#define TMPF s6
#define TMPVF {v6.s}[0]
#define SZ 4
/******************************************************************************/
.macro INIT_F1
ldr MAXF, [X], #SZ
mov Z, #1
mov INDEX, Z
fabs MAXF, MAXF
.endm
.macro KERNEL_F1
ldr TMPF, [X], #SZ
add Z, Z, #1
fabs TMPF, TMPF
fcmp TMPF, MAXF
fcsel MAXF, MAXF, TMPF, le
csel INDEX, INDEX, Z, le
.endm
.macro INIT_F4
ld1 {v0.4s}, [X], #16
fabs v0.4s, v0.4s
fmaxv MAXF, v0.4s
mov Z, #5
mov MAXF_Z, #1
.endm
.macro KERNEL_F4
ld1 {v0.4s}, [X], #16
fabs v0.4s, v0.4s
fmaxv TMPF, v0.4s
PRFM PLDL1KEEP, [X, #512]
fcmp TMPF, MAXF
fcsel MAXF, MAXF, TMPF, le
csel MAXF_Z, MAXF_Z, Z, le
add Z, Z, #4
.endm
.macro KERNEL_F4_FINALIZE
mov INDEX, MAXF_Z
sub MAXF_Z, MAXF_Z, #1
lsl MAXF_Z, MAXF_Z, #2
add X_COPY, X_COPY, MAXF_Z
ldr TMPF, [X_COPY], #SZ
fabs TMPF, TMPF
fcmp TMPF, MAXF
beq KERNEL_F4_FINALIZE_DONE
add INDEX, INDEX, #1
ldr TMPF, [X_COPY], #SZ
fabs TMPF, TMPF
fcmp TMPF, MAXF
beq KERNEL_F4_FINALIZE_DONE
add INDEX, INDEX, #1
ldr TMPF, [X_COPY], #SZ
fabs TMPF, TMPF
fcmp TMPF, MAXF
beq KERNEL_F4_FINALIZE_DONE
add INDEX, INDEX, #1
KERNEL_F4_FINALIZE_DONE:
.endm
.macro INIT_S
lsl INC_X, INC_X, #2
ld1 TMPVF, [X], INC_X
mov Z, #1
mov INDEX, Z
fabs MAXF, TMPF
.endm
.macro KERNEL_S1
ld1 TMPVF, [X], INC_X
add Z, Z, #1
fabs TMPF, TMPF
fcmp TMPF, MAXF
fcsel MAXF, MAXF, TMPF, le
csel INDEX, INDEX, Z, le
.endm
/*******************************************************************************
* End of macro definitions
*******************************************************************************/
PROLOGUE
cmp N, xzr
ble iamax_kernel_zero
cmp INC_X, xzr
ble iamax_kernel_zero
PRFM PLDL1KEEP, [X]
mov X_COPY, X
cmp INC_X, #1
bne iamax_kernel_S_BEGIN
iamax_kernel_F_BEGIN:
asr I, N, #2
cmp I, xzr
beq iamax_kernel_F1_INIT
INIT_F4
subs I, I, #1
beq iamax_kernel_F4_FINALIZE
iamax_kernel_F4:
KERNEL_F4
subs I, I, #1
bne iamax_kernel_F4
iamax_kernel_F4_FINALIZE:
KERNEL_F4_FINALIZE
iamax_kernel_F1:
ands I, N, #3
ble iamax_kernel_L999
iamax_kernel_F10:
KERNEL_F1
subs I, I, #1
bne iamax_kernel_F10
b iamax_kernel_L999
iamax_kernel_F1_INIT:
INIT_F1
subs N, N, #1
b iamax_kernel_F1
iamax_kernel_S_BEGIN:
INIT_S
subs N, N, #1
ble iamax_kernel_L999
asr I, N, #2
cmp I, xzr
ble iamax_kernel_S1
iamax_kernel_S4:
KERNEL_S1
KERNEL_S1
KERNEL_S1
KERNEL_S1
subs I, I, #1
bne iamax_kernel_S4
iamax_kernel_S1:
ands I, N, #3
ble iamax_kernel_L999
iamax_kernel_S10:
KERNEL_S1
subs I, I, #1
bne iamax_kernel_S10
iamax_kernel_L999:
mov x0, INDEX
ret
iamax_kernel_zero:
mov x0, xzr
ret
EPILOGUE

225
kernel/arm64/nrm2.S Normal file
View File

@ -0,0 +1,225 @@
/*******************************************************************************
Copyright (c) 2015, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*******************************************************************************/
#define ASSEMBLER
#include "common.h"
#define N x0
#define X x1
#define INC_X x2
#define I x3
#if !defined(DOUBLE)
#define SSQ s0
#define SCALE s1
#define REGZERO s5
#define REGONE s6
#else
#define SSQ d0
#define SCALE d1
#define REGZERO d5
#define REGONE d6
#endif
/*******************************************************************************
* Macro definitions
*******************************************************************************/
.macro KERNEL_F1
#if !defined(DOUBLE)
ldr s4, [X], #4
fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@
fabs s4, s4
fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_X_\@
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_X_\@:
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
#else
ldr d4, [X], #8
fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@
fabs d4, d4
fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_X_\@
fdiv d2, SCALE, d4
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_X_\@:
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
#endif
KERNEL_F1_NEXT_\@:
.endm
.macro KERNEL_S1
#if !defined(DOUBLE)
ldr s4, [X]
fcmp s4, REGZERO
beq KERNEL_S1_NEXT
fabs s4, s4
fcmp SCALE, s4
bge KERNEL_S1_SCALE_GE_X
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b KERNEL_S1_NEXT
KERNEL_S1_SCALE_GE_X:
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
#else
ldr d4, [X]
fcmp d4, REGZERO
beq KERNEL_S1_NEXT
fabs d4, d4
fcmp SCALE, d4
bge KERNEL_S1_SCALE_GE_X
fdiv d2, SCALE, d4
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d4
b KERNEL_S1_NEXT
KERNEL_S1_SCALE_GE_X:
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
#endif
KERNEL_S1_NEXT:
add X, X, INC_X
.endm
.macro KERNEL_F8
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
.endm
.macro INIT_S
#if !defined(DOUBLE)
lsl INC_X, INC_X, #2 // INC_X * SIZE
#else
lsl INC_X, INC_X, #3 // INC_X * SIZE
#endif
.endm
.macro INIT
eor v1.16b, v1.16b, v1.16b // scale=0.0
fmov SSQ, #1.0
fmov REGONE, SSQ
fmov REGZERO, SCALE
.endm
/*******************************************************************************
* End of macro definitions
*******************************************************************************/
PROLOGUE
.align 5
INIT
cmp N, #0
ble nrm2_kernel_L999
cmp INC_X, #0
beq nrm2_kernel_L999
cmp INC_X, #1
bne nrm2_kernel_S_BEGIN
nrm2_kernel_F_BEGIN:
asr I, N, #3 // I = N / 8
cmp I, xzr
ble nrm2_kernel_F1
nrm2_kernel_F8:
KERNEL_F8
subs I, I, #1
bne nrm2_kernel_F8
nrm2_kernel_F1:
ands I, N, #7
ble nrm2_kernel_L999
nrm2_kernel_F10:
KERNEL_F1
subs I, I, #1
bne nrm2_kernel_F10
b nrm2_kernel_L999
nrm2_kernel_S_BEGIN:
INIT_S
mov I, N
.align 5
nrm2_kernel_S10:
KERNEL_S1
subs I, I, #1
bne nrm2_kernel_S10
nrm2_kernel_L999:
fsqrt SSQ, SSQ
fmul SSQ, SCALE, SSQ
ret
EPILOGUE

View File

@ -59,10 +59,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro INIT_F1
#if !defined(DOUBLE)
fneg s2, S
eor v2.16b, v2.16b, v2.16b
fsub s2, s2, S
ins v1.s[1], v2.s[0] // [-S, S]
#else
fneg d2, S
eor v2.16b, v2.16b, v2.16b
fsub d2, d2, S
ins v1.d[1], v2.d[0] // [-S, S]
#endif
.endm

View File

@ -43,14 +43,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE)
#define DA_R s0 /* scale input value */
#define DA_I s1 /* scale input value */
#define TMPX v2.2s
#define TMPY v3.2s
#define SZ 4
#else
#define DA_R d0 /* scale input value */
#define DA_I d1 /* scale input value */
#define TMPX v2.2d
#define TMPY v3.2d
#define SZ 8
#endif
@ -61,22 +57,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(CONJ)
#if !defined(DOUBLE)
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
fneg s2, DA_I
eor v2.16b, v2.16b, v2.16b
fsub s2, s2, DA_I
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I
#else
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
fneg d2, DA_I
eor v2.16b, v2.16b, v2.16b
fsub d2, d2, DA_I
ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I
ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I
#endif
#else
#if !defined(DOUBLE)
fneg s2, DA_R
eor v2.16b, v2.16b, v2.16b
fsub s2, s2, DA_R
ins v0.s[1], v2.s[0] // v0 = -DA_R, DA_R
ins v1.s[1], v1.s[0] // v1 = DA_I, DA_I
#else
fneg d2, DA_R
eor v2.16b, v2.16b, v2.16b
fsub d2, d2, DA_R
ins v0.d[1], v2.d[0] // v0 = -DA_R, DA_R
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I
#endif
@ -111,9 +111,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_INIT_F4
#if !defined(DOUBLE)
// Replicate the lower 2 floats into the upper 2 slots
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I
ins v16.s[0], v0.s[0]
ins v16.s[1], v16.s[0]
ins v16.d[1], v16.d[0]
#if !defined(CONJ)
ins v17.s[0], v1.s[1]
#else
ins v17.s[0], v1.s[0]
#endif
ins v17.s[1], v17.s[0]
ins v17.d[1], v17.d[0]
#else //DOUBLE
ins v16.d[0], v0.d[0]
ins v16.d[1], v16.d[0]
#if !defined(CONJ)
ins v17.d[0], v1.d[1]
#else
ins v17.d[0], v1.d[0]
#endif
ins v17.d[1], v17.d[0]
#endif
.endm
@ -121,55 +137,60 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4
#if !defined(DOUBLE)
ld1 {v2.4s,v3.4s}, [X], #32 // V2 = X[3], X[2], X[1], X[0]
// V3 = X[7], X[6], X[5], X[4]
ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1]
ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1]
ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1]
ld2 {v2.4s, v3.4s}, [X], #32
ld2 {v4.4s, v5.4s}, [Y_COPY], #32
ld1 {v4.4s,v5.4s}, [Y] // V4 = Y[3], Y[2], Y[1], Y[0]
// V5 = Y[7], Y[6], Y[5], Y[4]
fmla v4.4s, v2.4s, v16.4s
#if !defined(CONJ)
fmls v4.4s, v3.4s, v17.4s
#else
fmla v4.4s, v3.4s, v17.4s
#endif
ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5]
ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5]
ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5]
#if !defined(CONJ)
fmla v5.4s, v2.4s, v17.4s
#else
fmls v5.4s, v2.4s, v17.4s
#endif
fmla v5.4s, v3.4s, v16.4s
fmla v4.4s, v0.4s, v2.4s // Y[iy] += DA_R * X[ix]
// Y[iy+1] += +-DA_R * X[ix+1]
fmla v4.4s, v1.4s, v6.4s // Y[iy] += +-DA_I * X[ix+1]
// Y[iy+1] += DA_I * X[ix]
st1 {v4.4s}, [Y], #16
fmla v5.4s, v0.4s, v3.4s // Y[iy] += DA_R * X[ix]
fmla v5.4s, v1.4s, v7.4s // Y[iy] += +-DA_I * X[ix+1]
// Y[iy+1] += +-DA_R * X[ix+1]
// Y[iy+1] += DA_I * X[ix]
st1 {v5.4s}, [Y], #16
st2 {v4.4s, v5.4s}, [Y], #32
#else // DOUBLE
ld1 {v2.2d,v3.2d}, [X], #32 // CX0, CX1, CX2, CX3
ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1]
ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1]
ld2 {v2.2d, v3.2d}, [X], #32
ld2 {v4.2d, v5.2d}, [Y_COPY], #32
ld1 {v4.2d,v5.2d}, [X], #32 // CX0, CX1, CX2, CX3
ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1]
ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1]
fmla v4.2d, v2.2d, v16.2d
#if !defined(CONJ)
fmls v4.2d, v3.2d, v17.2d
#else
fmla v4.2d, v3.2d, v17.2d
#endif
#if !defined(CONJ)
fmla v5.2d, v2.2d, v17.2d
#else
fmls v5.2d, v2.2d, v17.2d
#endif
fmla v5.2d, v3.2d, v16.2d
ld1 {v16.2d,v17.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3
st2 {v4.2d, v5.2d}, [Y], #32
fmla v16.2d, v0.2d, v2.2d
fmla v17.2d, v0.2d, v3.2d
ld2 {v18.2d, v19.2d}, [X], #32
ld2 {v20.2d, v21.2d}, [Y_COPY], #32
ld1 {v18.2d,v19.2d}, [Y_COPY], #32 // CY0, CY1, CY2, CY3
fmla v20.2d, v18.2d, v16.2d
#if !defined(CONJ)
fmls v20.2d, v19.2d, v17.2d
#else
fmla v20.2d, v19.2d, v17.2d
#endif
#if !defined(CONJ)
fmla v21.2d, v18.2d, v17.2d
#else
fmls v21.2d, v18.2d, v17.2d
#endif
fmla v21.2d, v19.2d, v16.2d
fmla v16.2d, v1.2d, v20.2d
fmla v17.2d, v1.2d, v21.2d
st1 {v16.2d,v17.2d}, [Y], #32
fmla v18.2d, v0.2d, v4.2d
fmla v19.2d, v0.2d, v5.2d
fmla v18.2d, v1.2d, v22.2d
fmla v19.2d, v1.2d, v23.2d
st1 {v18.2d,v19.2d}, [Y], #32
st2 {v20.2d, v21.2d}, [Y], #32
#endif
PRFM PLDL1KEEP, [X, #512]
PRFM PLDL1KEEP, [Y, #512]

View File

@ -184,73 +184,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmul v16.2d, v0.2d, v8.2d[0]
OP_ii v16.2d, v1.2d, v9.2d[0]
fmul v17.2d, v0.2d, v9.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v17.2d, v17.2d
eor v17.16b, v17.16b, v17.16b
fmls v17.2d, v0.2d, v9.2d[0]
#else
fmul v17.2d, v0.2d, v9.2d[0]
#endif
OP_ir v17.2d, v1.2d, v8.2d[0]
fmul v18.2d, v2.2d, v8.2d[0]
OP_ii v18.2d, v3.2d, v9.2d[0]
fmul v19.2d, v2.2d, v9.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v19.2d, v19.2d
eor v19.16b, v19.16b, v19.16b
fmls v19.2d, v2.2d, v9.2d[0]
#else
fmul v19.2d, v2.2d, v9.2d[0]
#endif
OP_ir v19.2d, v3.2d, v8.2d[0]
fmul v20.2d, v0.2d, v8.2d[1]
OP_ii v20.2d, v1.2d, v9.2d[1]
fmul v21.2d, v0.2d, v9.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v21.2d, v21.2d
eor v21.16b, v21.16b, v21.16b
fmls v21.2d, v0.2d, v9.2d[1]
#else
fmul v21.2d, v0.2d, v9.2d[1]
#endif
OP_ir v21.2d, v1.2d, v8.2d[1]
fmul v22.2d, v2.2d, v8.2d[1]
OP_ii v22.2d, v3.2d, v9.2d[1]
fmul v23.2d, v2.2d, v9.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v23.2d, v23.2d
eor v23.16b, v23.16b, v23.16b
fmls v23.2d, v2.2d, v9.2d[1]
#else
fmul v23.2d, v2.2d, v9.2d[1]
#endif
OP_ir v23.2d, v3.2d, v8.2d[1]
fmul v24.2d, v0.2d, v10.2d[0]
OP_ii v24.2d, v1.2d, v11.2d[0]
fmul v25.2d, v0.2d, v11.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v25.2d, v25.2d
eor v25.16b, v25.16b, v25.16b
fmls v25.2d, v0.2d, v11.2d[0]
#else
fmul v25.2d, v0.2d, v11.2d[0]
#endif
OP_ir v25.2d, v1.2d, v10.2d[0]
fmul v26.2d, v2.2d, v10.2d[0]
OP_ii v26.2d, v3.2d, v11.2d[0]
fmul v27.2d, v2.2d, v11.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v27.2d, v27.2d
eor v27.16b, v27.16b, v27.16b
fmls v27.2d, v2.2d, v11.2d[0]
#else
fmul v27.2d, v2.2d, v11.2d[0]
#endif
OP_ir v27.2d, v3.2d, v10.2d[0]
fmul v28.2d, v0.2d, v10.2d[1]
OP_ii v28.2d, v1.2d, v11.2d[1]
fmul v29.2d, v0.2d, v11.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v29.2d, v29.2d
eor v29.16b, v29.16b, v29.16b
fmls v29.2d, v0.2d, v11.2d[1]
#else
fmul v29.2d, v0.2d, v11.2d[1]
#endif
OP_ir v29.2d, v1.2d, v10.2d[1]
fmul v30.2d, v2.2d, v10.2d[1]
OP_ii v30.2d, v3.2d, v11.2d[1]
fmul v31.2d, v2.2d, v11.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v31.2d, v31.2d
eor v31.16b, v31.16b, v31.16b
fmls v31.2d, v2.2d, v11.2d[1]
#else
fmul v31.2d, v2.2d, v11.2d[1]
#endif
OP_ir v31.2d, v3.2d, v10.2d[1]

View File

@ -111,14 +111,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/******* INIT FOR F1 AND S1 LOOP ******/
#if !defined(DOUBLE)
ins v0.s[1], v0.s[0] // R(ALPHA), R(ALPHA)
fneg s2, ALPHA_I
eor v2.16b, v2.16b, v2.16b
fsub s2, s2, ALPHA_I
ins v1.s[1], v2.s[0] // -I(ALPHA), I(ALPHA)
#if !defined(XCONJ)
ext v1.8b, v1.8b, v1.8b, #4 // I(ALPHA), -I(ALPHA)
#endif
#else
ins v0.d[1], v0.d[0] // R(ALPHA), R(ALPHA)
fneg d2, ALPHA_I
eor v2.16b, v2.16b, v2.16b
fsub d2, d2, ALPHA_I
ins v1.d[1], v2.d[0] // -I(ALPHA), I(ALPHA)
#if !defined(XCONJ)
ext v1.16b, v1.16b, v1.16b, #8 // I(ALPHA), -I(ALPHA)
@ -156,8 +158,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
fmul v11.4s, v9.4s, v7.4s // [+ R(X) * R(ALPHA)]
fmls v11.4s, v10.4s, v8.4s // [- I(X) * I(ALPHA)]
fmul v12.4s, v9.4s, v8.4s // [R(X) * I(ALPHA)]
fneg v12.4s, v12.4s // [- R(X) * I(ALPHA)]
eor v12.16b, v12.16b, v12.16b
fmls v12.4s, v9.4s, v8.4s // [- R(X) * I(ALPHA)]
fmla v12.4s, v10.4s, v7.4s // [- I(X) * R(ALPHA)]
#endif
#endif // CONJ
@ -170,24 +172,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ins v3.s[0], v2.s[1]
#if !defined(CONJ)
#if !defined(XCONJ)
fneg s4, s3
eor v4.16b, v4.16b, v4.16b
fsub s4, s4, s3
ins v3.s[1], v4.s[0]
ext v3.8b, v3.8b, v3.8b, #4 // [I(TEMP), -I(TEMP)]
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
#else
fneg s4, s3
eor v4.16b, v4.16b, v4.16b
fsub s4, s4, s3
ins v3.s[1], v4.s[0] // [-I(TEMP), I(TEMP)]
ins v2.s[1], v2.s[0] // [R(TEMP), R(TEMP)]
#endif
#else // CONJ
#if !defined(XCONJ)
ins v3.s[1], v3.s[0] // [I(TEMP), I(TEMP)]
fneg s4, s2
eor v4.16b, v4.16b, v4.16b
fsub s4, s4, s2
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
#else
fneg s3, s3
eor v4.16b, v4.16b, v4.16b
fsub s3, s4, s3
ins v3.s[1], v3.s[0] // [-I(TEMP), -I(TEMP)]
fneg s4, s2
eor v4.16b, v4.16b, v4.16b
fsub s4, s4, s2
ins v2.s[1], v4.s[0] // [-R(TEMP), R(TEMP)]
#endif
#endif // CONJ
@ -220,8 +227,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
fmul v11.2d, v9.2d, v7.2d // [+ R(X) * R(ALPHA)]
fmls v11.2d, v10.2d, v8.2d // [- I(X) * I(ALPHA)]
fmul v12.2d, v9.2d, v8.2d // [R(X) * I(ALPHA)]
fneg v12.2d, v12.2d // [- R(X) * I(ALPHA)]
eor v12.16b, v12.16b, v12.16b
fmls v12.2d, v9.2d, v8.2d // [- R(X) * I(ALPHA)]
fmla v12.2d, v10.2d, v7.2d // [- I(X) * R(ALPHA)]
#endif
#endif // CONJ
@ -234,24 +241,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ins v3.d[0], v2.d[1] // I(TEMP)
#if !defined(CONJ)
#if !defined(XCONJ)
fneg d4, d3 // -I(TEMP)
eor v4.16b, v4.16b, v4.16b
fsub d4, d4, d3
ins v3.d[1], v4.d[0]
ext v3.16b, v3.16b, v3.16b, #8 // [I(TEMP), -I(TEMP)]
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
#else
fneg d4, d3 // -I(TEMP)
eor v4.16b, v4.16b, v4.16b
fsub d4, d4, d3
ins v3.d[1], v4.d[0] // [-I(TEMP), I(TEMP)]
ins v2.d[1], v2.d[0] // [R(TEMP), R(TEMP)]
#endif
#else // CONJ
#if !defined(XCONJ)
ins v3.d[1], v3.d[0] // [I(TEMP), I(TEMP)]
fneg d4, d2 // -R(TEMP)
eor v4.16b, v4.16b, v4.16b
fsub d4, d4, d2
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
#else
fneg d3, d3 // -I(TEMP)
eor v4.16b, v4.16b, v4.16b
fsub d3, d4, d3
ins v3.d[1], v3.d[0] // [-I(TEMP), -I(TEMP)]
fneg d4, d2 // -R(TEMP)
eor v4.16b, v4.16b, v4.16b
fsub d4, d4, d2
ins v2.d[1], v4.d[0] // [-R(TEMP), R(TEMP)]
#endif
#endif // CONJ

View File

@ -96,22 +96,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(XCONJ)
#if !defined(DOUBLE)
ins v0.s[1], v0.s[0] // v0 = ALPHA_R, ALPHA_R
fneg s2, ALPHA_I
eor v2.16b, v2.16b, v2.16b
fsub s2, s2, ALPHA_I
ins v1.s[1], v2.s[0]
ext v1.8b, v1.8b, v1.8b, #4 // v1 = ALPHA_I, -ALPHA_I
#else
ins v0.d[1], v0.d[0] // v0 = ALPHA_R, ALPHA_R
fneg d2, ALPHA_I
eor v2.16b, v2.16b, v2.16b
fsub d2, d2, ALPHA_I
ins v1.d[1], v2.d[0]
ext v1.16b, v1.16b, v1.16b, #8 // v1 = ALPHA_I, -ALPHA_I
#endif
#else // XCONJ
#if !defined(DOUBLE)
fneg s2, ALPHA_R
eor v2.16b, v2.16b, v2.16b
fsub s2, s2, ALPHA_R
ins v0.s[1], v2.s[0] // v0 = -ALPHA_R, ALPHA_R
ins v1.s[1], v1.s[0] // v1 = ALPHA_I, ALPHA_I
#else
fneg d2, ALPHA_R
eor v2.16b, v2.16b, v2.16b
fsub d2, d2, ALPHA_R
ins v0.d[1], v2.d[0] // v0 = -ALPHA_R, ALPHA_R
ins v1.d[1], v1.d[0] // v1 = ALPHA_I, ALPHA_I
#endif
@ -136,89 +140,51 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld2 {v11.4s, v12.4s}, [X_PTR], #32
ld2 {v13.4s, v14.4s}, [A_PTR], #32
#if !defined(CONJ)
#if !defined(XCONJ)
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I]
fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I]
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
#else
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I]
fmla v10.4s, v11.4s, v14.4s // [+ R(X) * A_I]
fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R]
#endif
#else // CONJ
#if !defined(XCONJ)
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
fmla v9.4s, v12.4s, v14.4s // [+ I(X) * A_I]
fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I]
fmla v10.4s, v12.4s, v13.4s // [+ I(X) * A_R]
#else
fmla v9.4s, v11.4s, v13.4s // [+ R(X) * A_R]
fmls v9.4s, v12.4s, v14.4s // [- I(X) * A_I]
fmls v10.4s, v11.4s, v14.4s // [- R(X) * A_I]
fmls v10.4s, v12.4s, v13.4s // [- I(X) * A_R]
#endif
#endif // CONJ
#else // DOUBLE
ld2 {v11.2d, v12.2d}, [X_PTR], #32
ld2 {v13.2d, v14.2d}, [A_PTR], #32
prfm PLDL1STRM, [X_PTR, #512]
#if !defined(CONJ)
#if !defined(XCONJ)
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I]
fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I]
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
#else
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I]
fmla v10.2d, v11.2d, v14.2d // [+ R(X) * A_I]
fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R]
#endif
#else // CONJ
#if !defined(XCONJ)
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
fmla v9.2d, v12.2d, v14.2d // [+ I(X) * A_I]
fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I]
fmla v10.2d, v12.2d, v13.2d // [+ I(X) * A_R]
#else
fmla v9.2d, v11.2d, v13.2d // [+ R(X) * A_R]
fmls v9.2d, v12.2d, v14.2d // [- I(X) * A_I]
fmls v10.2d, v11.2d, v14.2d // [- R(X) * A_I]
fmls v10.2d, v12.2d, v13.2d // [- I(X) * A_R]
#endif
#endif // CONJ
ld2 {v17.2d, v18.2d}, [X_PTR], #32
ld2 {v19.2d, v20.2d}, [A_PTR], #32
prfm PLDL1STRM, [A_PTR, #512]
#if !defined(CONJ)
#if !defined(XCONJ)
#if (!defined(CONJ) && !defined(XCONJ)) || (defined(CONJ) && defined(XCONJ))
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
#else
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
fmla v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
#endif
#else // CONJ
#if !defined(XCONJ)
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
fmla v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
fmla v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
#else
fmla v15.2d, v17.2d, v19.2d // [+ R(X) * A_R]
fmls v15.2d, v18.2d, v20.2d // [- I(X) * A_I]
fmls v16.2d, v17.2d, v20.2d // [+ R(X) * A_I]
fmls v16.2d, v18.2d, v19.2d // [+ I(X) * A_R]
#endif
#endif // CONJ
#endif //DOUBLE
.endm
@ -252,7 +218,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
ld1 {v5.s}[0], [A_PTR], #4 // A1
ld1 {v6.2s}, [X_PTR], #8 // [X1, X0]
fneg s16, s5
eor v16.16b, v16.16b, v16.16b
fsub s16, s16, s5
ins v5.s[1], v16.s[0] // [-A1, A1]
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
@ -264,7 +231,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
ld1 {v5.d}[0], [A_PTR], #8 // A1
ld1 {v6.2d}, [X_PTR], #16 // [X1, X0]
fneg d16, d5
eor v16.16b, v16.16b, v16.16b
fsub d16, d16, d5
ins v5.d[1], v16.d[0] // [-A1, A1]
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]
@ -284,7 +252,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld1r {v4.2s}, [A_PTR], #4 // [A0, A0]
ld1 {v5.s}[0], [A_PTR], #4 // A1
ld1 {v6.2s}, [X_PTR], INC_X // [X1, X0]
fneg s16, s5
eor v16.16b, v16.16b, v16.16b
fsub s16, s16, s5
ins v5.s[1], v16.s[0] // [-A1, A1]
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
ext v5.8b, v5.8b, v5.8b, #4 // [A1, -A1]
@ -296,7 +265,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld1r {v4.2d}, [A_PTR], #8 // [A0, A0]
ld1 {v5.d}[0], [A_PTR], #8 // A1
ld1 {v6.2d}, [X_PTR], INC_X // [X1, X0]
fneg d16, d5
eor v16.16b, v16.16b, v16.16b
fsub d16, d16, d5
ins v5.d[1], v16.d[0] // [-A1, A1]
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
ext v5.16b, v5.16b, v5.16b, #8 // [A1, -A1]

View File

@ -28,135 +28,217 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ASSEMBLER
#include "common.h"
#define N x0 /* vector length */
#define X x1 /* X vector address */
#define INC_X x2 /* X stride */
#define I x5 /* loop variable */
#define N x0
#define X x1
#define INC_X x2
/*******************************************************************************
* Macro definitions
*******************************************************************************/
#define I x3
#if !defined(DOUBLE)
#define TMPF s6
#define SSQ s0
#define TMPVF {v6.s}[0]
#define SZ 4
#define SCALE s1
#define REGZERO s6
#define REGONE s7
#else
#define TMPF d6
#define SSQ d0
#define TMPVF {v6.d}[0]
#define SZ 8
#define SCALE d1
#define REGZERO d6
#define REGONE d7
#endif
/******************************************************************************/
/**************************************************************************************
* Macro definitions
**************************************************************************************/
.macro KERNEL_F1
#if !defined(DOUBLE)
ld1 {v1.2s}, [X], #8
fmul v1.2s, v1.2s, v1.2s
faddp TMPF, v1.2s
fadd SSQ, SSQ, TMPF
ldr s4, [X], #4
fcmp s4, REGZERO
beq KERNEL_F1_NEXT_\@
fabs s4, s4
fcmp SCALE, s4
bge KERNEL_F1_SCALE_GE_XR_\@
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_XR_\@:
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
KERNEL_F1_NEXT_\@:
ldr s5, [X], #4
fcmp s5, REGZERO
beq KERNEL_F1_END_\@
fabs s5, s5
fcmp SCALE, s5
bge KERNEL_F1_SCALE_GE_XI_\@
fdiv s2, SCALE, s5
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s5
b KERNEL_F1_END_\@
KERNEL_F1_SCALE_GE_XI_\@:
fdiv s2, s5, SCALE
fmla SSQ, s2, v2.s[0]
#else
ld1 {v1.2d}, [X], #16
fmul v1.2d, v1.2d, v1.2d
faddp TMPF, v1.2d
fadd SSQ, SSQ, TMPF
#endif
.endm
.macro KERNEL_F8
#if !defined(DOUBLE)
ld1 {v1.4s, v2.4s}, [X], #32
fmla v0.4s, v1.4s, v1.4s
fmla v5.4s, v2.4s, v2.4s
ld1 {v3.4s,v4.4s}, [X], #32
fmla v0.4s, v3.4s, v3.4s
fmla v5.4s, v4.4s, v4.4s
PRFM PLDL1KEEP, [X, #1024]
#else // DOUBLE
ld1 {v1.2d, v2.2d}, [X], #32
fmla v0.2d, v1.2d, v1.2d
fmla v5.2d, v2.2d, v2.2d
ld1 {v3.2d, v4.2d}, [X], #32
fmla v0.2d, v3.2d, v3.2d
fmla v5.2d, v4.2d, v4.2d
ld1 {v16.2d, v17.2d}, [X], #32
fmla v0.2d, v16.2d, v16.2d
fmla v5.2d, v17.2d, v17.2d
ld1 {v18.2d, v19.2d}, [X], #32
fmla v0.2d, v18.2d, v18.2d
fmla v5.2d, v19.2d, v19.2d
#endif
.endm
.macro nrm2_kernel_F8_FINALIZE
#if !defined(DOUBLE)
fadd v0.4s, v0.4s, v5.4s
ext v1.16b, v0.16b, v0.16b, #8
fadd v0.2s, v0.2s, v1.2s
faddp SSQ, v0.2s
#else
fadd v0.2d, v0.2d, v5.2d
faddp SSQ, v0.2d
#endif
.endm
.macro INIT_S
#if !defined(DOUBLE)
lsl INC_X, INC_X, #3
ld1 {v1.2s}, [X], INC_X
fmul v1.2s, v1.2s, v1.2s
faddp SSQ, v1.2s
#else
lsl INC_X, INC_X, #4
ld1 {v1.2d}, [X], INC_X
fmul v1.2d, v1.2d, v1.2d
faddp SSQ, v1.2d
ldr d4, [X], #8
fcmp d4, REGZERO
beq KERNEL_F1_NEXT_\@
fabs d4, d4
fcmp SCALE, d4
bge KERNEL_F1_SCALE_GE_XR_\@
fdiv d2, SCALE, d4
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d4
b KERNEL_F1_NEXT_\@
KERNEL_F1_SCALE_GE_XR_\@:
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
KERNEL_F1_NEXT_\@:
ldr d5, [X], #8
fcmp d5, REGZERO
beq KERNEL_F1_END_\@
fabs d5, d5
fcmp SCALE, d5
bge KERNEL_F1_SCALE_GE_XI_\@
fdiv d2, SCALE, d5
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d5
b KERNEL_F1_END_\@
KERNEL_F1_SCALE_GE_XI_\@:
fdiv d2, d5, SCALE
fmla SSQ, d2, v2.d[0]
#endif
KERNEL_F1_END_\@:
.endm
.macro KERNEL_S1
#if !defined(DOUBLE)
ld1 {v1.2s}, [X], INC_X
fmul v1.2s, v1.2s, v1.2s
faddp TMPF, v1.2s
fadd SSQ, SSQ, TMPF
ldr s4, [X]
fcmp s4, REGZERO
beq KERNEL_S1_NEXT_\@
fabs s4, s4
fcmp SCALE, s4
bge KERNEL_S1_SCALE_GE_XR_\@
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b KERNEL_S1_NEXT_\@
KERNEL_S1_SCALE_GE_XR_\@:
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
KERNEL_S1_NEXT_\@:
ldr s5, [X, #4]
fcmp s5, REGZERO
beq KERNEL_S1_END_\@
fabs s5, s5
fcmp SCALE, s5
bge KERNEL_S1_SCALE_GE_XI_\@
fdiv s2, SCALE, s5
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s5
b KERNEL_S1_END_\@
KERNEL_S1_SCALE_GE_XI_\@:
fdiv s2, s5, SCALE
fmla SSQ, s2, v2.s[0]
#else
ld1 {v1.2d}, [X], INC_X
fmul v1.2d, v1.2d, v1.2d
faddp TMPF, v1.2d
fadd SSQ, SSQ, TMPF
ldr d4, [X]
fcmp d4, REGZERO
beq KERNEL_S1_NEXT_\@
fabs d4, d4
fcmp SCALE, d4
bge KERNEL_S1_SCALE_GE_XR_\@
fdiv d2, SCALE, d4
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d4
b KERNEL_S1_NEXT_\@
KERNEL_S1_SCALE_GE_XR_\@:
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
KERNEL_S1_NEXT_\@:
ldr d5, [X, #8]
fcmp d5, REGZERO
beq KERNEL_S1_END_\@
fabs d5, d5
fcmp SCALE, d5
bge KERNEL_S1_SCALE_GE_XI_\@
fdiv d2, SCALE, d5
fmul d2, d2, d2
fmul d3, SSQ, d2
fadd SSQ, REGONE, d3
fmov SCALE, d5
b KERNEL_S1_END_\@
KERNEL_S1_SCALE_GE_XI_\@:
fdiv d2, d5, SCALE
fmla SSQ, d2, v2.d[0]
#endif
KERNEL_S1_END_\@:
add X, X, INC_X
.endm
.macro KERNEL_F8
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
KERNEL_F1
.endm
.macro INIT_S
#if !defined(DOUBLE)
lsl INC_X, INC_X, #3 // INC_X * SIZE
#else
lsl INC_X, INC_X, #4 // INC_X * SIZE
#endif
.endm
/*******************************************************************************
.macro INIT
eor v1.16b, v1.16b, v1.16b // scale=0.0
fmov SSQ, #1.0
fmov REGONE, SSQ
fmov REGZERO, SCALE
.endm
/**************************************************************************************
* End of macro definitions
*******************************************************************************/
**************************************************************************************/
PROLOGUE
#if !defined(DOUBLE)
fmov SSQ, wzr
fmov s5, SSQ
#else
fmov SSQ, xzr
fmov d5, SSQ
#endif
.align 5
INIT
cmp N, #0
ble nrm2_kernel_L999
cmp INC_X, #0
beq nrm2_kernel_L999
cmp N, xzr
ble nrm2_kernel_zero
cmp INC_X, xzr
ble nrm2_kernel_zero
cmp INC_X, #1
bne nrm2_kernel_S_BEGIN
nrm2_kernel_F_BEGIN:
asr I, N, #3
asr I, N, #3 // I = N / 8
cmp I, xzr
beq nrm2_kernel_F1_INIT
ble nrm2_kernel_F1
nrm2_kernel_F8:
@ -165,13 +247,12 @@ nrm2_kernel_F8:
subs I, I, #1
bne nrm2_kernel_F8
nrm2_kernel_F8_FINALIZE
nrm2_kernel_F1:
ands I, N, #7
ble nrm2_kernel_L999
nrm2_kernel_F10:
KERNEL_F1
@ -181,35 +262,13 @@ nrm2_kernel_F10:
b nrm2_kernel_L999
nrm2_kernel_F1_INIT:
b nrm2_kernel_F1
nrm2_kernel_S_BEGIN:
INIT_S
subs N, N, #1
ble nrm2_kernel_L999
mov I, N
asr I, N, #2
cmp I, xzr
ble nrm2_kernel_S1
nrm2_kernel_S4:
KERNEL_S1
KERNEL_S1
KERNEL_S1
KERNEL_S1
subs I, I, #1
bne nrm2_kernel_S4
nrm2_kernel_S1:
ands I, N, #3
ble nrm2_kernel_L999
.align 5
nrm2_kernel_S10:
@ -218,11 +277,12 @@ nrm2_kernel_S10:
subs I, I, #1
bne nrm2_kernel_S10
nrm2_kernel_L999:
fsqrt SSQ, SSQ
ret
fmul SSQ, SCALE, SSQ
nrm2_kernel_zero:
ret
EPILOGUE

View File

@ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define X x3 /* X vector address */
#define INC_X x4 /* X stride */
#define I x5 /* loop variable */
#define X_COPY x6 /* Copy of X */
/*******************************************************************************
* Macro definitions
@ -51,42 +52,54 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if !defined(DOUBLE)
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
fneg s2, DA_I
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
ext v1.8b, v1.8b, v1.8b, #4 // v1 = DA_I, -DA_I
#else
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
fneg d2, DA_I
ins v1.d[1], v2.d[0] // v1 = DA_I, DA_I
ext v1.16b, v1.16b, v1.16b, #8 // v1 = DA_I, -DA_I
#endif
.endm
.macro KERNEL_F1
#if !defined(DOUBLE)
ld1 {v2.2s}, [X] // X1, X0
ext v3.8b, v2.8b, v2.8b, #4 // X0, X1
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v2.2s}, [X], #8
fmul s3, DA_R, v2.s[0] // DA_R*X0
fmul s5, DA_I, v2.s[1] // DA_I*X1
fsub s3, s3, s5 // DA_R*X0-DA_I*X1
fmul s4, DA_I, v2.s[0] // DA_I*X0
fmul s5, DA_R, v2.s[1] // DA_R*X1
fadd s4, s4, s5 // DA_I*X0+DA_R*X1
ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v3.2s}, [X], #8
#else
ld1 {v2.2d}, [X] // X1, X0
ext v3.16b, v2.16b, v2.16b, #8 // X0, X1
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v2.2d}, [X], #16
#endif
fmul d3, DA_R, v2.d[0] // DA_R*X0
fmul d5, DA_I, v2.d[1] // DA_I*X1
fsub d3, d3, d5 // DA_R*X0-DA_I*X1
fmul d4, DA_I, v2.d[0] // DA_I*X0
fmul d5, DA_R, v2.d[1] // DA_R*X1
fadd d4, d4, d5 // DA_I*X0+DA_R*X1
ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v3.2d}, [X], #16
#endif
.endm
.macro KERNEL_INIT_F4
#if !defined(DOUBLE)
// Replicate the lower 2 floats into the upper 2 slots
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R, DA_R, DA_R
ins v1.d[1], v1.d[0] // v1 = DA_I, DA_I, DA_I, DA_I
ins v16.s[0], v0.s[0]
ins v16.s[1], v16.s[0]
ins v16.d[1], v16.d[0]
ins v17.s[0], v1.s[0]
ins v17.s[1], v17.s[0]
ins v17.d[1], v17.d[0]
#else //DOUBLE
ins v16.d[0], v0.d[0]
ins v16.d[1], v16.d[0]
ins v17.d[0], v1.d[0]
ins v17.d[1], v17.d[0]
#endif
.endm
@ -94,46 +107,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4
#if !defined(DOUBLE)
ld1 {v2.4s,v3.4s}, [X] // V2 = X[3], X[2], X[1], X[0]
// V3 = X[7], X[6], X[5], X[4]
ld2 {v2.4s, v3.4s}, [X], #32
ext v6.8b, v2.8b, v2.8b, #4 // V6 = - , - , X[0], X[1]
ins v6.s[2], v2.s[3] // V6 = - , X[3], X[0], X[1]
ins v6.s[3], v2.s[2] // V6 = X[2], X[3], X[0], X[1]
fmul v2.4s, v0.4s, v2.4s // X'[ix] += DA_R * X[ix]
// X'[ix+1] += DA_R * X[ix+1]
fmla v2.4s, v1.4s, v6.4s // X'[ix] += -DA_I * X[ix+1]
// X'[ix+1] += DA_I * X[ix]
fmul v4.4s, v2.4s, v16.4s
fmul v6.4s, v3.4s, v17.4s
fsub v4.4s, v4.4s, v6.4s
ext v7.8b, v3.8b, v3.8b, #4 // V7 = - , - , X[4], X[5]
ins v7.s[2], v3.s[3] // V7 = - , X[7], X[4], X[5]
ins v7.s[3], v3.s[2] // V7 = X[6], X[7], X[4], X[5]
fmul v3.4s, v0.4s, v3.4s // X'[ix] += DA_R * X[ix]
// X'[ix+1] += DA_R * X[ix+1]
fmla v3.4s, v1.4s, v7.4s // X'[ix] += -DA_I * X[ix+1]
// X'[ix+1] += DA_I * X[ix]
fmul v5.4s, v2.4s, v17.4s
fmul v6.4s, v3.4s, v16.4s
fadd v5.4s, v5.4s, v6.4s
st1 {v2.4s,v3.4s}, [X], #32
st2 {v4.4s, v5.4s}, [X_COPY], #32
#else // DOUBLE
ld1 {v2.2d,v3.2d,v4.2d,v5.2d}, [X] // CX0, CX1, CX2, CX3
ext v20.16b, v2.16b, v2.16b, #8 // X[ix], X[ix+1]
ext v21.16b, v3.16b, v3.16b, #8 // X[ix], X[ix+1]
ext v22.16b, v4.16b, v4.16b, #8 // X[ix], X[ix+1]
ext v23.16b, v5.16b, v5.16b, #8 // X[ix], X[ix+1]
ld2 {v2.2d, v3.2d}, [X], #32
fmul v2.2d, v0.2d, v2.2d
fmla v2.2d, v1.2d, v20.2d
fmul v4.2d, v2.2d, v16.2d
fmul v6.2d, v3.2d, v17.2d
fsub v4.2d, v4.2d, v6.2d
fmul v5.2d, v2.2d, v17.2d
fmul v6.2d, v3.2d, v16.2d
fadd v5.2d, v5.2d, v6.2d
fmul v3.2d, v0.2d, v3.2d
fmla v3.2d, v1.2d, v21.2d
st1 {v2.2d,v3.2d}, [X], #32
st2 {v4.2d, v5.2d}, [X_COPY], #32
fmul v4.2d, v0.2d, v4.2d
fmla v4.2d, v1.2d, v22.2d
ld2 {v18.2d, v19.2d}, [X], #32
fmul v5.2d, v0.2d, v5.2d
fmla v5.2d, v1.2d, v23.2d
st1 {v4.2d,v5.2d}, [X], #32
fmul v20.2d, v18.2d, v16.2d
fmul v6.2d, v19.2d, v17.2d
fsub v20.2d, v20.2d, v6.2d
fmul v21.2d, v18.2d, v17.2d
fmul v6.2d, v19.2d, v16.2d
fadd v21.2d, v21.2d, v6.2d
st2 {v20.2d, v21.2d}, [X_COPY], #32
#endif
PRFM PLDL1KEEP, [X, #1024]
.endm
@ -149,21 +155,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.endm
.macro KERNEL_S1
#if !defined(DOUBLE)
ld1 {v2.2s}, [X] // X1, X0
ext v3.8b, v2.8b, v2.8b, #4 // X0, X1
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
fmla v2.2s, v3.2s, v1.2s // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v2.2s}, [X], INC_X
fmul s3, DA_R, v2.s[0] // DA_R*X0
fmul s5, DA_I, v2.s[1] // DA_I*X1
fsub s3, s3, s5 // DA_R*X0-DA_I*X1
fmul s4, DA_I, v2.s[0] // DA_I*X0
fmul s5, DA_R, v2.s[1] // DA_R*X1
fadd s4, s4, s5 // DA_I*X0+DA_R*X1
ins v3.s[1], v4.s[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v3.2s}, [X], INC_X
#else
ld1 {v2.2d}, [X] // X1, X0
ext v3.16b, v2.16b, v2.16b, #8 // X0, X1
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
fmla v2.2d, v3.2d, v1.2d // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v2.2d}, [X], INC_X
#endif
fmul d3, DA_R, v2.d[0] // DA_R*X0
fmul d5, DA_I, v2.d[1] // DA_I*X1
fsub d3, d3, d5 // DA_R*X0-DA_I*X1
fmul d4, DA_I, v2.d[0] // DA_I*X0
fmul d5, DA_R, v2.d[1] // DA_R*X1
fadd d4, d4, d5 // DA_I*X0+DA_R*X1
ins v3.d[1], v4.d[0] // DA_R*X1+DA_I*X0, DA_R*X0-DA_I*X1
st1 {v3.2d}, [X], INC_X
#endif
.endm
/*******************************************************************************
@ -172,20 +188,53 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PROLOGUE
b zscal_begin
data_ar:
.word 0x3e44fae6
data_ai:
.word 0x3d320fa2
data_xr:
.word 0x3f4baff1
data_xi:
.word 0xbe8ef0bd
zscal_begin:
ldr s20, data_ar
ldr s21, data_ai
ldr s22, data_xr
ldr s23, data_xi
fmul s24, s22, s21
fmla s24, s23, v20.s[0]
fmul s25, s22, s21
fmul s26, s23, s20
fadd s25, s25, s26
mov X_COPY, X
cmp N, xzr
ble zscal_kernel_L999
fcmp DA_R, #0.0
bne zscal_kernel_1
bne zscal_kernel_R_non_zero
fcmp DA_I, #0.0
beq zscal_kernel_zero
beq zscal_kernel_RI_zero
// TODO: special case DA_R == 0 && DA_I != 0
b zscal_kernel_R_zero
zscal_kernel_1:
zscal_kernel_R_non_zero:
// TODO: special case DA_R != 0 && DA_I == 0
fcmp DA_I, #0.0
beq zscal_kernel_I_zero
/*******************************************************************************
* A_R != 0 && A_I != 0
*******************************************************************************/
zscal_kernel_RI_non_zero:
INIT
@ -257,16 +306,85 @@ zscal_kernel_L999:
mov w0, wzr
ret
zscal_kernel_zero:
/*******************************************************************************
* A_R == 0 && A_I != 0
*******************************************************************************/
zscal_kernel_R_zero:
INIT_S
#if !defined(DOUBLE)
eor v2.16b, v2.16b, v2.16b
fsub s2, s2, DA_I
ins v1.s[1], v2.s[0] // v1 = -DA_I, DA_I
#else
eor v2.16b, v2.16b, v2.16b
fsub d2, d2, DA_I
ins v1.d[1], v2.d[0] // v1 = -DA_I, DA_I
#endif
zscal_kernel_R_zero_1:
#if !defined(DOUBLE)
ld1 {v2.2s}, [X] // X1, X0
fmul v2.2s, v2.2s, v1.2s // -DA_I*X1, DA_I*X0
ext v2.8b, v2.8b, v2.8b, #4 // DA_I*X0, -DA_I*X1
st1 {v2.2s}, [X]
#else
ld1 {v2.2d}, [X] // X1, X0
fmul v2.2d, v2.2d, v1.2d // -DA_I*X1, DA_I*X0
ext v2.16b, v2.16b, v2.16b, #8 // DA_I*X0, -DA_I*X1
st1 {v2.2d}, [X]
#endif
add X, X, INC_X
subs N, N, #1
bne zscal_kernel_R_zero_1
mov w0, wzr
ret
/*******************************************************************************
* A_R != 0 && A_I == 0
*******************************************************************************/
zscal_kernel_I_zero:
INIT_S
#if !defined(DOUBLE)
ins v0.s[1], v0.s[0] // v0 = DA_R, DA_R
#else
ins v0.d[1], v0.d[0] // v0 = DA_R, DA_R
#endif
zscal_kernel_I_zero_1:
#if !defined(DOUBLE)
ld1 {v2.2s}, [X] // X1, X0
fmul v2.2s, v2.2s, v0.2s // DA_R*X1, DA_R*X0
st1 {v2.2s}, [X]
#else
ld1 {v2.2d}, [X] // X1, X0
fmul v2.2d, v2.2d, v0.2d // DA_R*X1, DA_R*X0
st1 {v2.2d}, [X]
#endif
add X, X, INC_X
subs N, N, #1
bne zscal_kernel_I_zero_1
mov w0, wzr
ret
/*******************************************************************************
* A_R == 0 && A_I == 0
*******************************************************************************/
zscal_kernel_RI_zero:
INIT_S
zscal_kernel_Z1:
zscal_kernel_RI_zero_1:
stp DA_R, DA_I, [X]
add X, X, INC_X
subs N, N, #1
bne zscal_kernel_Z1
bne zscal_kernel_RI_zero_1
mov w0, wzr
ret

View File

@ -187,73 +187,89 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmul v16.2d, v0.2d, v8.2d[0]
OP_ii v16.2d, v1.2d, v9.2d[0]
fmul v17.2d, v0.2d, v9.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v17.2d, v17.2d
eor v17.16b, v17.16b, v17.16b
fmls v17.2d, v0.2d, v9.2d[0]
#else
fmul v17.2d, v0.2d, v9.2d[0]
#endif
OP_ir v17.2d, v1.2d, v8.2d[0]
fmul v18.2d, v2.2d, v8.2d[0]
OP_ii v18.2d, v3.2d, v9.2d[0]
fmul v19.2d, v2.2d, v9.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v19.2d, v19.2d
eor v19.16b, v19.16b, v19.16b
fmls v19.2d, v2.2d, v9.2d[0]
#else
fmul v19.2d, v2.2d, v9.2d[0]
#endif
OP_ir v19.2d, v3.2d, v8.2d[0]
fmul v20.2d, v0.2d, v8.2d[1]
OP_ii v20.2d, v1.2d, v9.2d[1]
fmul v21.2d, v0.2d, v9.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v21.2d, v21.2d
eor v21.16b, v21.16b, v21.16b
fmls v21.2d, v0.2d, v9.2d[1]
#else
fmul v21.2d, v0.2d, v9.2d[1]
#endif
OP_ir v21.2d, v1.2d, v8.2d[1]
fmul v22.2d, v2.2d, v8.2d[1]
OP_ii v22.2d, v3.2d, v9.2d[1]
fmul v23.2d, v2.2d, v9.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v23.2d, v23.2d
eor v23.16b, v23.16b, v23.16b
fmls v23.2d, v2.2d, v9.2d[1]
#else
fmul v23.2d, v2.2d, v9.2d[1]
#endif
OP_ir v23.2d, v3.2d, v8.2d[1]
fmul v24.2d, v0.2d, v10.2d[0]
OP_ii v24.2d, v1.2d, v11.2d[0]
fmul v25.2d, v0.2d, v11.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v25.2d, v25.2d
eor v25.16b, v25.16b, v25.16b
fmls v25.2d, v0.2d, v11.2d[0]
#else
fmul v25.2d, v0.2d, v11.2d[0]
#endif
OP_ir v25.2d, v1.2d, v10.2d[0]
fmul v26.2d, v2.2d, v10.2d[0]
OP_ii v26.2d, v3.2d, v11.2d[0]
fmul v27.2d, v2.2d, v11.2d[0]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v27.2d, v27.2d
eor v27.16b, v27.16b, v27.16b
fmls v27.2d, v2.2d, v11.2d[0]
#else
fmul v27.2d, v2.2d, v11.2d[0]
#endif
OP_ir v27.2d, v3.2d, v10.2d[0]
fmul v28.2d, v0.2d, v10.2d[1]
OP_ii v28.2d, v1.2d, v11.2d[1]
fmul v29.2d, v0.2d, v11.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v29.2d, v29.2d
eor v29.16b, v29.16b, v29.16b
fmls v29.2d, v0.2d, v11.2d[1]
#else
fmul v29.2d, v0.2d, v11.2d[1]
#endif
OP_ir v29.2d, v1.2d, v10.2d[1]
fmul v30.2d, v2.2d, v10.2d[1]
OP_ii v30.2d, v3.2d, v11.2d[1]
fmul v31.2d, v2.2d, v11.2d[1]
#if defined(NR) || defined(NC) || defined(TR) || defined(TC) || \
defined(RR) || defined(RC) || defined(CR) || defined(CC)
fneg v31.2d, v31.2d
eor v31.16b, v31.16b, v31.16b
fmls v31.2d, v2.2d, v11.2d[1]
#else
fmul v31.2d, v2.2d, v11.2d[1]
#endif
OP_ir v31.2d, v3.2d, v10.2d[1]

View File

@ -137,8 +137,13 @@ endif()
add_library(blas ${ALLOBJ})
if(UNIX)
target_link_libraries(blas m)
endif()
#if(UNIX)
# target_link_libraries(blas m)
#endif()
set_target_properties(
blas PROPERTIES
VERSION ${LAPACK_VERSION}
SOVERSION ${LAPACK_MAJOR_VERSION}
)
target_link_libraries(blas)
lapack_install_library(blas)

View File

@ -23,8 +23,9 @@
*>
*> \verbatim
*>
*> forms the dot product of two vectors, conjugating the first
*> vector.
*> CDOTC forms the dot product of two complex vectors
*> CDOTC = X^H * Y
*>
*> \endverbatim
*
* Authors:
@ -35,7 +36,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex_blas_level1
*
@ -51,10 +52,10 @@
* =====================================================================
COMPLEX FUNCTION CDOTC(N,CX,INCX,CY,INCY)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N

View File

@ -23,7 +23,9 @@
*>
*> \verbatim
*>
*> CDOTU forms the dot product of two vectors.
*> CDOTU forms the dot product of two complex vectors
*> CDOTU = X^T * Y
*>
*> \endverbatim
*
* Authors:
@ -34,7 +36,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex_blas_level1
*
@ -50,10 +52,10 @@
* =====================================================================
COMPLEX FUNCTION CDOTU(N,CX,INCX,CY,INCY)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N

View File

@ -165,7 +165,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex_blas_level2
*
@ -187,10 +187,10 @@
* =====================================================================
SUBROUTINE CGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX ALPHA,BETA
@ -319,18 +319,15 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
K = KUP1 - J
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
Y(I) = Y(I) + TEMP*A(K+I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
K = KUP1 - J
@ -338,7 +335,6 @@
Y(IY) = Y(IY) + TEMP*A(K+I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
IF (J.GT.KU) KY = KY + INCY
80 CONTINUE

View File

@ -166,7 +166,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex_blas_level3
*
@ -187,10 +187,10 @@
* =====================================================================
SUBROUTINE CGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.4.0) --
* -- Reference BLAS level3 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX ALPHA,BETA
@ -317,12 +317,10 @@
60 CONTINUE
END IF
DO 80 L = 1,K
IF (B(L,J).NE.ZERO) THEN
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
END IF
80 CONTINUE
90 CONTINUE
ELSE IF (CONJA) THEN
@ -376,12 +374,10 @@
170 CONTINUE
END IF
DO 190 L = 1,K
IF (B(J,L).NE.ZERO) THEN
TEMP = ALPHA*CONJG(B(J,L))
DO 180 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
180 CONTINUE
END IF
190 CONTINUE
200 CONTINUE
ELSE
@ -399,12 +395,10 @@
220 CONTINUE
END IF
DO 240 L = 1,K
IF (B(J,L).NE.ZERO) THEN
TEMP = ALPHA*B(J,L)
DO 230 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
230 CONTINUE
END IF
240 CONTINUE
250 CONTINUE
END IF

View File

@ -136,7 +136,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex_blas_level2
*
@ -158,10 +158,10 @@
* =====================================================================
SUBROUTINE CGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX ALPHA,BETA
@ -285,24 +285,20 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
DO 50 I = 1,M
Y(I) = Y(I) + TEMP*A(I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
DO 70 I = 1,M
Y(IY) = Y(IY) + TEMP*A(I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
80 CONTINUE
END IF

View File

@ -21,7 +21,7 @@
*>
*> \verbatim
*>
*> DCABS1 computes absolute value of a double complex number
*> DCABS1 computes |Re(.)| + |Im(.)| of a double complex number
*> \endverbatim
*
* Authors:
@ -32,17 +32,17 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup double_blas_level1
*
* =====================================================================
DOUBLE PRECISION FUNCTION DCABS1(Z)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX*16 Z

View File

@ -163,7 +163,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup double_blas_level2
*
@ -185,10 +185,10 @@
* =====================================================================
SUBROUTINE DGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
@ -312,18 +312,15 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
K = KUP1 - J
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
Y(I) = Y(I) + TEMP*A(K+I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
K = KUP1 - J
@ -331,7 +328,6 @@
Y(IY) = Y(IY) + TEMP*A(K+I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
IF (J.GT.KU) KY = KY + INCY
80 CONTINUE

View File

@ -166,7 +166,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup double_blas_level3
*
@ -187,10 +187,10 @@
* =====================================================================
SUBROUTINE DGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.4.0) --
* -- Reference BLAS level3 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
@ -311,12 +311,10 @@
60 CONTINUE
END IF
DO 80 L = 1,K
IF (B(L,J).NE.ZERO) THEN
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
END IF
80 CONTINUE
90 CONTINUE
ELSE
@ -353,12 +351,10 @@
140 CONTINUE
END IF
DO 160 L = 1,K
IF (B(J,L).NE.ZERO) THEN
TEMP = ALPHA*B(J,L)
DO 150 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
150 CONTINUE
END IF
160 CONTINUE
170 CONTINUE
ELSE

View File

@ -134,7 +134,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup double_blas_level2
*
@ -156,10 +156,10 @@
* =====================================================================
SUBROUTINE DGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
DOUBLE PRECISION ALPHA,BETA
@ -278,24 +278,20 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
DO 50 I = 1,M
Y(I) = Y(I) + TEMP*A(I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
DO 70 I = 1,M
Y(IY) = Y(IY) + TEMP*A(I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
80 CONTINUE
END IF

View File

@ -23,7 +23,8 @@
*>
*> \verbatim
*>
*> DZASUM takes the sum of the absolute values.
*> DZASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
*> returns a single precision result.
*> \endverbatim
*
* Authors:
@ -34,7 +35,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup double_blas_level1
*
@ -51,10 +52,10 @@
* =====================================================================
DOUBLE PRECISION FUNCTION DZASUM(N,ZX,INCX)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,N

View File

@ -23,7 +23,7 @@
*>
*> \verbatim
*>
*> ICAMAX finds the index of element having max. absolute value.
*> ICAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)|
*> \endverbatim
*
* Authors:
@ -34,7 +34,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup aux_blas
*
@ -51,10 +51,10 @@
* =====================================================================
INTEGER FUNCTION ICAMAX(N,CX,INCX)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,N

View File

@ -23,7 +23,7 @@
*>
*> \verbatim
*>
*> IDAMAX finds the index of element having max. absolute value.
*> IDAMAX finds the index of the first element having maximum absolute value.
*> \endverbatim
*
* Authors:
@ -34,7 +34,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup aux_blas
*
@ -51,10 +51,10 @@
* =====================================================================
INTEGER FUNCTION IDAMAX(N,DX,INCX)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,N

View File

@ -23,7 +23,7 @@
*>
*> \verbatim
*>
*> ISAMAX finds the index of element having max. absolute value.
*> ISAMAX finds the index of the first element having maximum absolute value.
*> \endverbatim
*
* Authors:
@ -34,7 +34,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup aux_blas
*
@ -51,10 +51,10 @@
* =====================================================================
INTEGER FUNCTION ISAMAX(N,SX,INCX)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,N

View File

@ -23,7 +23,7 @@
*>
*> \verbatim
*>
*> IZAMAX finds the index of element having max. absolute value.
*> IZAMAX finds the index of the first element having maximum |Re(.)| + |Im(.)|
*> \endverbatim
*
* Authors:
@ -34,7 +34,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup aux_blas
*
@ -51,10 +51,10 @@
* =====================================================================
INTEGER FUNCTION IZAMAX(N,ZX,INCX)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,N

View File

@ -20,7 +20,7 @@
*>
*> \verbatim
*>
*> SCABS1 computes absolute value of a complex number
*> SCABS1 computes |Re(.)| + |Im(.)| of a complex number
*> \endverbatim
*
* Authors:
@ -31,17 +31,17 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup single_blas_level1
*
* =====================================================================
REAL FUNCTION SCABS1(Z)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX Z

View File

@ -23,7 +23,7 @@
*>
*> \verbatim
*>
*> SCASUM takes the sum of the absolute values of a complex vector and
*> SCASUM takes the sum of the (|Re(.)| + |Im(.)|)'s of a complex vector and
*> returns a single precision result.
*> \endverbatim
*
@ -35,7 +35,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup single_blas_level1
*
@ -52,10 +52,10 @@
* =====================================================================
REAL FUNCTION SCASUM(N,CX,INCX)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,N

View File

@ -163,7 +163,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup single_blas_level2
*
@ -185,10 +185,10 @@
* =====================================================================
SUBROUTINE SGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
REAL ALPHA,BETA
@ -312,18 +312,15 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
K = KUP1 - J
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
Y(I) = Y(I) + TEMP*A(K+I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
K = KUP1 - J
@ -331,7 +328,6 @@
Y(IY) = Y(IY) + TEMP*A(K+I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
IF (J.GT.KU) KY = KY + INCY
80 CONTINUE

View File

@ -166,7 +166,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup single_blas_level3
*
@ -187,10 +187,10 @@
* =====================================================================
SUBROUTINE SGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.4.0) --
* -- Reference BLAS level3 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
REAL ALPHA,BETA
@ -311,12 +311,10 @@
60 CONTINUE
END IF
DO 80 L = 1,K
IF (B(L,J).NE.ZERO) THEN
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
END IF
80 CONTINUE
90 CONTINUE
ELSE
@ -353,12 +351,10 @@
140 CONTINUE
END IF
DO 160 L = 1,K
IF (B(J,L).NE.ZERO) THEN
TEMP = ALPHA*B(J,L)
DO 150 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
150 CONTINUE
END IF
160 CONTINUE
170 CONTINUE
ELSE

View File

@ -134,7 +134,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup single_blas_level2
*
@ -156,10 +156,10 @@
* =====================================================================
SUBROUTINE SGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
REAL ALPHA,BETA
@ -278,24 +278,20 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
DO 50 I = 1,M
Y(I) = Y(I) + TEMP*A(I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
DO 70 I = 1,M
Y(IY) = Y(IY) + TEMP*A(I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
80 CONTINUE
END IF

View File

@ -23,7 +23,9 @@
*>
*> \verbatim
*>
*> ZDOTC forms the dot product of a vector.
*> ZDOTC forms the dot product of two complex vectors
*> ZDOTC = X^H * Y
*>
*> \endverbatim
*
* Authors:
@ -34,7 +36,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex16_blas_level1
*
@ -50,10 +52,10 @@
* =====================================================================
COMPLEX*16 FUNCTION ZDOTC(N,ZX,INCX,ZY,INCY)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N

View File

@ -23,7 +23,9 @@
*>
*> \verbatim
*>
*> ZDOTU forms the dot product of two vectors.
*> ZDOTU forms the dot product of two complex vectors
*> ZDOTU = X^T * Y
*>
*> \endverbatim
*
* Authors:
@ -34,7 +36,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex16_blas_level1
*
@ -50,10 +52,10 @@
* =====================================================================
COMPLEX*16 FUNCTION ZDOTU(N,ZX,INCX,ZY,INCY)
*
* -- Reference BLAS level1 routine (version 3.4.0) --
* -- Reference BLAS level1 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
INTEGER INCX,INCY,N

View File

@ -165,7 +165,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex16_blas_level2
*
@ -187,10 +187,10 @@
* =====================================================================
SUBROUTINE ZGBMV(TRANS,M,N,KL,KU,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX*16 ALPHA,BETA
@ -319,18 +319,15 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
K = KUP1 - J
DO 50 I = MAX(1,J-KU),MIN(M,J+KL)
Y(I) = Y(I) + TEMP*A(K+I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
K = KUP1 - J
@ -338,7 +335,6 @@
Y(IY) = Y(IY) + TEMP*A(K+I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
IF (J.GT.KU) KY = KY + INCY
80 CONTINUE

View File

@ -166,7 +166,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex16_blas_level3
*
@ -187,10 +187,10 @@
* =====================================================================
SUBROUTINE ZGEMM(TRANSA,TRANSB,M,N,K,ALPHA,A,LDA,B,LDB,BETA,C,LDC)
*
* -- Reference BLAS level3 routine (version 3.4.0) --
* -- Reference BLAS level3 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX*16 ALPHA,BETA
@ -317,12 +317,10 @@
60 CONTINUE
END IF
DO 80 L = 1,K
IF (B(L,J).NE.ZERO) THEN
TEMP = ALPHA*B(L,J)
DO 70 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
70 CONTINUE
END IF
80 CONTINUE
90 CONTINUE
ELSE IF (CONJA) THEN
@ -376,12 +374,10 @@
170 CONTINUE
END IF
DO 190 L = 1,K
IF (B(J,L).NE.ZERO) THEN
TEMP = ALPHA*DCONJG(B(J,L))
DO 180 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
180 CONTINUE
END IF
190 CONTINUE
200 CONTINUE
ELSE
@ -399,12 +395,10 @@
220 CONTINUE
END IF
DO 240 L = 1,K
IF (B(J,L).NE.ZERO) THEN
TEMP = ALPHA*B(J,L)
DO 230 I = 1,M
C(I,J) = C(I,J) + TEMP*A(I,L)
230 CONTINUE
END IF
240 CONTINUE
250 CONTINUE
END IF

View File

@ -136,7 +136,7 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2011
*> \date November 2015
*
*> \ingroup complex16_blas_level2
*
@ -158,10 +158,10 @@
* =====================================================================
SUBROUTINE ZGEMV(TRANS,M,N,ALPHA,A,LDA,X,INCX,BETA,Y,INCY)
*
* -- Reference BLAS level2 routine (version 3.4.0) --
* -- Reference BLAS level2 routine (version 3.6.0) --
* -- Reference BLAS is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* November 2011
* November 2015
*
* .. Scalar Arguments ..
COMPLEX*16 ALPHA,BETA
@ -285,24 +285,20 @@
JX = KX
IF (INCY.EQ.1) THEN
DO 60 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
DO 50 I = 1,M
Y(I) = Y(I) + TEMP*A(I,J)
50 CONTINUE
END IF
JX = JX + INCX
60 CONTINUE
ELSE
DO 80 J = 1,N
IF (X(JX).NE.ZERO) THEN
TEMP = ALPHA*X(JX)
IY = KY
DO 70 I = 1,M
Y(IY) = Y(IY) + TEMP*A(I,J)
IY = IY + INCY
70 CONTINUE
END IF
JX = JX + INCX
80 CONTINUE
END IF

View File

@ -30,17 +30,16 @@ macro(add_blas_test name src)
get_filename_component(baseNAME ${src} NAME_WE)
set(TEST_INPUT "${LAPACK_SOURCE_DIR}/BLAS/${baseNAME}.in")
add_executable(${name} ${src})
get_target_property(TEST_LOC ${name} LOCATION)
target_link_libraries(${name} blas)
if(EXISTS "${TEST_INPUT}")
add_test(BLAS-${name} "${CMAKE_COMMAND}"
-DTEST=${TEST_LOC}
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
-DTEST=$<TARGET_FILE:${name}>
-DINPUT=${TEST_INPUT}
-DINTDIR=${CMAKE_CFG_INTDIR}
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
else()
add_test(BLAS-${name} "${CMAKE_COMMAND}"
-DTEST=${TEST_LOC}
add_test(NAME BLAS-${name} COMMAND "${CMAKE_COMMAND}"
-DTEST=$<TARGET_FILE:${name}>
-DINTDIR=${CMAKE_CFG_INTDIR}
-P "${LAPACK_SOURCE_DIR}/TESTING/runtest.cmake")
endif()

View File

@ -120,7 +120,7 @@
REAL RZERO
PARAMETER ( RZERO = 0.0 )
INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 128, INCMAX = 2 )
PARAMETER ( NMAX = 65, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 )

View File

@ -102,7 +102,7 @@
REAL RZERO
PARAMETER ( RZERO = 0.0 )
INTEGER NMAX
PARAMETER ( NMAX = 128 )
PARAMETER ( NMAX = 65 )
INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars ..

View File

@ -117,7 +117,7 @@
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 128, INCMAX = 2 )
PARAMETER ( NMAX = 65, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 )

View File

@ -97,7 +97,7 @@
DOUBLE PRECISION ZERO, ONE
PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
INTEGER NMAX
PARAMETER ( NMAX = 128 )
PARAMETER ( NMAX = 65 )
INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars ..

View File

@ -117,7 +117,7 @@
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 128, INCMAX = 2 )
PARAMETER ( NMAX = 65, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 )

View File

@ -97,7 +97,7 @@
REAL ZERO, ONE
PARAMETER ( ZERO = 0.0, ONE = 1.0 )
INTEGER NMAX
PARAMETER ( NMAX = 128 )
PARAMETER ( NMAX = 65 )
INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars ..

View File

@ -121,7 +121,7 @@
DOUBLE PRECISION RZERO
PARAMETER ( RZERO = 0.0D0 )
INTEGER NMAX, INCMAX
PARAMETER ( NMAX = 128, INCMAX = 2 )
PARAMETER ( NMAX = 65, INCMAX = 2 )
INTEGER NINMAX, NIDMAX, NKBMAX, NALMAX, NBEMAX
PARAMETER ( NINMAX = 7, NIDMAX = 9, NKBMAX = 7,
$ NALMAX = 7, NBEMAX = 7 )

View File

@ -104,7 +104,7 @@
DOUBLE PRECISION RZERO
PARAMETER ( RZERO = 0.0D0 )
INTEGER NMAX
PARAMETER ( NMAX = 128 )
PARAMETER ( NMAX = 65 )
INTEGER NIDMAX, NALMAX, NBEMAX
PARAMETER ( NIDMAX = 9, NALMAX = 7, NBEMAX = 7 )
* .. Local Scalars ..

View File

@ -0,0 +1,90 @@
message(STATUS "CBLAS enable")
enable_language(C)
set(LAPACK_INSTALL_EXPORT_NAME cblas-targets)
# Create a header file cblas.h for the routines called in my C programs
include(FortranCInterface)
FortranCInterface_HEADER( ${CMAKE_CURRENT_SOURCE_DIR}/include/cblas_mangling.h
MACRO_NAMESPACE "F77_"
SYMBOL_NAMESPACE "F77_" )
# Old way to detect mangling
#include(FortranMangling)
#FORTRAN_MANGLING(CDEFS)
#set(CDEFS ${CDEFS} CACHE STRING "Fortran Mangling" FORCE)
#MESSAGE(STATUS "=========")
# --------------------------------------------------
# Compiler Flags
#ADD_DEFINITIONS( "-D${CDEFS}")
include_directories( include )
add_subdirectory(include)
add_subdirectory(src)
macro(append_subdir_files variable dirname)
get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable})
foreach(depfile ${holder})
list(APPEND ${variable} "${dirname}/${depfile}")
endforeach()
endmacro()
append_subdir_files(CBLAS_INCLUDE "include")
INSTALL( FILES ${CBLAS_INCLUDE} DESTINATION include )
# --------------------------------------------------
if(BUILD_TESTING)
add_subdirectory(testing)
add_subdirectory(examples)
endif(BUILD_TESTING)
if(NOT BLAS_FOUND)
set(ALL_TARGETS ${ALL_TARGETS} blas)
endif(NOT BLAS_FOUND)
# Export cblas targets from the
# install tree, if any.
set(_cblas_config_install_guard_target "")
if(ALL_TARGETS)
install(EXPORT cblas-targets
DESTINATION lib/cmake/cblas-${LAPACK_VERSION})
# Choose one of the cblas targets to use as a guard for
# cblas-config.cmake to load targets from the install tree.
list(GET ALL_TARGETS 0 _cblas_config_install_guard_target)
endif()
# Export cblas targets from the build tree, if any.
set(_cblas_config_build_guard_target "")
if(ALL_TARGETS)
export(TARGETS ${ALL_TARGETS} FILE cblas-targets.cmake)
# Choose one of the cblas targets to use as a guard
# for cblas-config.cmake to load targets from the build tree.
list(GET ALL_TARGETS 0 _cblas_config_build_guard_target)
endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-version.cmake.in
${LAPACK_BINARY_DIR}/cblas-config-version.cmake @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CMAKE/cblas-config-build.cmake.in
${LAPACK_BINARY_DIR}/cblas-config.cmake @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cblas.pc.in ${CMAKE_CURRENT_BINARY_DIR}/cblas.pc)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/cblas.pc
DESTINATION ${PKG_CONFIG_DIR}
)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/cblas-config-install.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake @ONLY)
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/cblas-config.cmake
${LAPACK_BINARY_DIR}/cblas-config-version.cmake
DESTINATION lib/cmake/cblas-${LAPACK_VERSION}
)
#install(EXPORT cblas-targets
# DESTINATION lib/cmake/cblas-${LAPACK_VERSION})

View File

@ -0,0 +1,27 @@
include ../make.inc
all:
cd include && cp cblas_mangling_with_flags.h cblas_mangling.h
cd src && $(MAKE) all
clean: cleanlib
cleanlib:
cd src && $(MAKE) clean
cleanexe:
cd testing && $(MAKE) cleanexe
cleanall: clean cleanexe
rm -f $(CBLASLIB)
cd examples && rm -f *.o cblas_ex1 cblas_ex2
cblas_testing:
cd testing && $(MAKE) all
runtst:
cd testing && $(MAKE) run
example: all
cd examples && make all

View File

@ -0,0 +1,49 @@
#
# Makefile.LINUX
#
#
# If you compile, change the name to Makefile.in.
#
#
#-----------------------------------------------------------------------------
# Shell
#-----------------------------------------------------------------------------
SHELL = /bin/sh
#-----------------------------------------------------------------------------
# Platform
#-----------------------------------------------------------------------------
PLAT = LINUX
#-----------------------------------------------------------------------------
# Libraries and includes
#-----------------------------------------------------------------------------
BLLIB = $(home)/lib/librefblas.a
CBLIB = ../lib/libcblas.a
#-----------------------------------------------------------------------------
# Compilers
#-----------------------------------------------------------------------------
CC = gcc
FC = gfortran
LOADER = $(FC)
#-----------------------------------------------------------------------------
# Flags for Compilers
#-----------------------------------------------------------------------------
CFLAGS = -O3 -DADD_
FFLAGS = -O3
#-----------------------------------------------------------------------------
# Archive programs and flags
#-----------------------------------------------------------------------------
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib

View File

@ -0,0 +1,59 @@
INSTALLATION
Make sure to set these variables appropriately in your Make.inc in the LAPACK folder:
CBLASLIB is your CBLAS library
BLASLIB is your Legacy BLAS library (by default the Reference BLAS shipped within LAPACK)
Then type:
prompt> make
which will create the CBLAS library.
CREATING THE TESTERS
type:
prompt> make cblas_testing
This will create the BLAS library if necessary, then compile the CBLAS testings.
EXECUTING THE TESTERS
type:
prompt> make runtst
_______________________________________________________________________________
This package contains C interface to Legacy BLAS.
Written by Keita Teranishi (5/20/98)
_______________________________________________________________________________
This release updates an inconsistency between the BLAST document and
the interface. According to the document, the enumerated types for
the C interface to the BLAS are not typedef'ed.
It also updates the Level 2 and 3 testers which check for correct
exiting of routines when called with bad arguments. This is done by
overriding the Legacy BLAS library's implementation of xerbla(). If
this cannot be done ( for instance one cannot override some calls
to xerbla() in Sun's Performance library), then correct error
exiting cannot be checked.
Updated by Jeff Horner (3/15/99)
_______________________________________________________________________________
Updated by R. Clint Whaley (2/23/03):
Fixed the i?amax error that I reported three years ago: standard dictates
IAMAX return vals in range 0 <= iamax < N, but reference was mistakenly
returning like F77: 0 < iamax <= N.
_______________________________________________________________________________
Updated by Julie Langou (08/22/2014):
Integrate CBLAS package into LAPACK
Improve headers for mangling

View File

@ -0,0 +1,9 @@
prefix=@prefix@
libdir=@libdir@
Name: lapacke
Description: C Standard Interface to BLAS Linear Algebra PACKage
Version: @LAPACK_VERSION@
URL: http://www.netlib.org/lapack/
Libs: -L${libdir} -lcblas
Requires: blas

View File

@ -0,0 +1,14 @@
# Load the LAPACK package with which we were built.
set(LAPACK_DIR "@LAPACK_BINARY_DIR@")
find_package(LAPACK NO_MODULE)
# Load lapack targets from the build tree, including lapacke targets.
if(NOT TARGET lapacke)
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
endif()
# Report lapacke header search locations.
set(CBLAS_INCLUDE_DIRS "@LAPACK_SOURCE_DIR@/cblas/include")
# Report lapacke libraries.
set(CBLAS_LIBRARIES cblas)

View File

@ -0,0 +1,23 @@
# Compute locations from <prefix>/lib/cmake/lapacke-<v>/<self>.cmake
get_filename_component(_CBLAS_SELF_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
get_filename_component(_CBLAS_PREFIX "${_CBLAS_SELF_DIR}" PATH)
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
get_filename_component(_CBLAS_PREFIX "${_CBLAS_PREFIX}" PATH)
# Load the LAPACK package with which we were built.
set(LAPACK_DIR "${_CBLAS_PREFIX}/lib/cmake/lapack-@LAPACK_VERSION@")
find_package(LAPACK NO_MODULE)
# Load lapacke targets from the install tree.
if(NOT TARGET cblas)
include(${_CBLAS_SELF_DIR}/cblas-targets.cmake)
endif()
# Report lapacke header search locations.
set(CBLAS_INCLUDE_DIRS ${_CBLAS_PREFIX}/include)
# Report lapacke libraries.
set(CBLAS_LIBRARIES cblas)
unset(_CBLAS_PREFIX)
unset(_CBLAS_SELF_DIR)

View File

@ -0,0 +1,8 @@
add_executable(xexample1_CBLAS cblas_example1.c )
add_executable(xexample2_CBLAS cblas_example2.c )
target_link_libraries(xexample1_CBLAS cblas ${BLAS_LIBRARIES})
target_link_libraries(xexample2_CBLAS cblas ${BLAS_LIBRARIES})
add_test(example1_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample1_CBLAS)
add_test(example2_CBLAS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/xexample2_CBLAS)

View File

@ -0,0 +1,14 @@
include ../../make.inc
all: example1 example2
example1:
$(CC) -c $(CFLAGS) -I../include cblas_example1.c
$(LOADER) -o cblas_ex1 cblas_example1.o $(CBLASLIB) $(BLASLIB)
example2:
$(CC) -c $(CFLAGS) -I../include cblas_example2.c
$(LOADER) -o cblas_ex2 cblas_example2.o $(CBLASLIB) $(BLASLIB)
cleanall:
rm -f *.o cblas_ex1 cblas_ex2

View File

@ -0,0 +1,69 @@
/* cblas_example.c */
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
int main ( )
{
CBLAS_LAYOUT Layout;
CBLAS_TRANSPOSE transa;
double *a, *x, *y;
double alpha, beta;
int m, n, lda, incx, incy, i;
Layout = CblasColMajor;
transa = CblasNoTrans;
m = 4; /* Size of Column ( the number of rows ) */
n = 4; /* Size of Row ( the number of columns ) */
lda = 4; /* Leading dimension of 5 * 4 matrix is 5 */
incx = 1;
incy = 1;
alpha = 1;
beta = 0;
a = (double *)malloc(sizeof(double)*m*n);
x = (double *)malloc(sizeof(double)*n);
y = (double *)malloc(sizeof(double)*n);
/* The elements of the first column */
a[0] = 1;
a[1] = 2;
a[2] = 3;
a[3] = 4;
/* The elements of the second column */
a[m] = 1;
a[m+1] = 1;
a[m+2] = 1;
a[m+3] = 1;
/* The elements of the third column */
a[m*2] = 3;
a[m*2+1] = 4;
a[m*2+2] = 5;
a[m*2+3] = 6;
/* The elements of the fourth column */
a[m*3] = 5;
a[m*3+1] = 6;
a[m*3+2] = 7;
a[m*3+3] = 8;
/* The elemetns of x and y */
x[0] = 1;
x[1] = 2;
x[2] = 1;
x[3] = 1;
y[0] = 0;
y[1] = 0;
y[2] = 0;
y[3] = 0;
cblas_dgemv( Layout, transa, m, n, alpha, a, lda, x, incx, beta,
y, incy );
/* Print y */
for( i = 0; i < n; i++ )
printf(" y%d = %f\n", i, y[i]);
free(a);
free(x);
free(y);
return 0;
}

View File

@ -0,0 +1,72 @@
/* cblas_example2.c */
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
#define INVALID -1
int main (int argc, char **argv )
{
int rout=-1,info=0,m,n,k,lda,ldb,ldc;
double A[2] = {0.0,0.0},
B[2] = {0.0,0.0},
C[2] = {0.0,0.0},
ALPHA=0.0, BETA=0.0;
if (argc > 2){
rout = atoi(argv[1]);
info = atoi(argv[2]);
}
if (rout == 1) {
if (info==0) {
printf("Checking if cblas_dgemm fails on parameter 4\n");
cblas_dgemm( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
}
if (info==1) {
printf("Checking if cblas_dgemm fails on parameter 5\n");
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0,
ALPHA, A, 1, B, 1, BETA, C, 1 );
}
if (info==2) {
printf("Checking if cblas_dgemm fails on parameter 9\n");
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2,
ALPHA, A, 1, B, 1, BETA, C, 2 );
}
if (info==3) {
printf("Checking if cblas_dgemm fails on parameter 11\n");
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 2,
ALPHA, A, 1, B, 1, BETA, C, 1 );
}
} else {
if (info==0) {
printf("Checking if F77_dgemm fails on parameter 3\n");
m=INVALID; n=0; k=0; lda=1; ldb=1; ldc=1;
F77_dgemm( "T", "N", &m, &n, &k,
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
}
if (info==1) {
m=0; n=INVALID; k=0; lda=1; ldb=1; ldc=1;
printf("Checking if F77_dgemm fails on parameter 4\n");
F77_dgemm( "N", "T", &m, &n, &k,
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
}
if (info==2) {
printf("Checking if F77_dgemm fails on parameter 8\n");
m=2; n=0; k=0; lda=1; ldb=1; ldc=2;
F77_dgemm( "N", "N" , &m, &n, &k,
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
}
if (info==3) {
printf("Checking if F77_dgemm fails on parameter 10\n");
m=0; n=0; k=2; lda=1; ldb=1; ldc=1;
F77_dgemm( "N", "N" , &m, &n, &k,
&ALPHA, A, &lda, B, &ldb, &BETA, C, &ldc );
}
}
return 0;
}

View File

@ -0,0 +1,3 @@
SET (CBLAS_INCLUDE cblas.h cblas_f77.h cblas_test.h cblas_mangling.h)
file(COPY ${CBLAS_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include)

View File

@ -0,0 +1,588 @@
#ifndef CBLAS_H
#define CBLAS_H
#include <stddef.h>
#ifdef __cplusplus
extern "C" { /* Assume C declarations for C++ */
#endif /* __cplusplus */
/*
* Enumerated and derived types
*/
#ifdef WeirdNEC
#define CBLAS_INDEX long
#else
#define CBLAS_INDEX int
#endif
typedef enum {CblasRowMajor=101, CblasColMajor=102} CBLAS_LAYOUT;
typedef enum {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113} CBLAS_TRANSPOSE;
typedef enum {CblasUpper=121, CblasLower=122} CBLAS_UPLO;
typedef enum {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG;
typedef enum {CblasLeft=141, CblasRight=142} CBLAS_SIDE;
typedef CBLAS_LAYOUT CBLAS_ORDER; /* this for backward compatibility with CBLAS_ORDER */
#include "cblas_mangling.h"
/*
* ===========================================================================
* Prototypes for level 1 BLAS functions (complex are recast as routines)
* ===========================================================================
*/
double cblas_dcabs1(const void *z);
float cblas_scabs1(const void *c);
float cblas_sdsdot(const int N, const float alpha, const float *X,
const int incX, const float *Y, const int incY);
double cblas_dsdot(const int N, const float *X, const int incX, const float *Y,
const int incY);
float cblas_sdot(const int N, const float *X, const int incX,
const float *Y, const int incY);
double cblas_ddot(const int N, const double *X, const int incX,
const double *Y, const int incY);
/*
* Functions having prefixes Z and C only
*/
void cblas_cdotu_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotu);
void cblas_cdotc_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotc);
void cblas_zdotu_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotu);
void cblas_zdotc_sub(const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotc);
/*
* Functions having prefixes S D SC DZ
*/
float cblas_snrm2(const int N, const float *X, const int incX);
float cblas_sasum(const int N, const float *X, const int incX);
double cblas_dnrm2(const int N, const double *X, const int incX);
double cblas_dasum(const int N, const double *X, const int incX);
float cblas_scnrm2(const int N, const void *X, const int incX);
float cblas_scasum(const int N, const void *X, const int incX);
double cblas_dznrm2(const int N, const void *X, const int incX);
double cblas_dzasum(const int N, const void *X, const int incX);
/*
* Functions having standard 4 prefixes (S D C Z)
*/
CBLAS_INDEX cblas_isamax(const int N, const float *X, const int incX);
CBLAS_INDEX cblas_idamax(const int N, const double *X, const int incX);
CBLAS_INDEX cblas_icamax(const int N, const void *X, const int incX);
CBLAS_INDEX cblas_izamax(const int N, const void *X, const int incX);
/*
* ===========================================================================
* Prototypes for level 1 BLAS routines
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (s, d, c, z)
*/
void cblas_sswap(const int N, float *X, const int incX,
float *Y, const int incY);
void cblas_scopy(const int N, const float *X, const int incX,
float *Y, const int incY);
void cblas_saxpy(const int N, const float alpha, const float *X,
const int incX, float *Y, const int incY);
void cblas_dswap(const int N, double *X, const int incX,
double *Y, const int incY);
void cblas_dcopy(const int N, const double *X, const int incX,
double *Y, const int incY);
void cblas_daxpy(const int N, const double alpha, const double *X,
const int incX, double *Y, const int incY);
void cblas_cswap(const int N, void *X, const int incX,
void *Y, const int incY);
void cblas_ccopy(const int N, const void *X, const int incX,
void *Y, const int incY);
void cblas_caxpy(const int N, const void *alpha, const void *X,
const int incX, void *Y, const int incY);
void cblas_zswap(const int N, void *X, const int incX,
void *Y, const int incY);
void cblas_zcopy(const int N, const void *X, const int incX,
void *Y, const int incY);
void cblas_zaxpy(const int N, const void *alpha, const void *X,
const int incX, void *Y, const int incY);
/*
* Routines with S and D prefix only
*/
void cblas_srotg(float *a, float *b, float *c, float *s);
void cblas_srotmg(float *d1, float *d2, float *b1, const float b2, float *P);
void cblas_srot(const int N, float *X, const int incX,
float *Y, const int incY, const float c, const float s);
void cblas_srotm(const int N, float *X, const int incX,
float *Y, const int incY, const float *P);
void cblas_drotg(double *a, double *b, double *c, double *s);
void cblas_drotmg(double *d1, double *d2, double *b1, const double b2, double *P);
void cblas_drot(const int N, double *X, const int incX,
double *Y, const int incY, const double c, const double s);
void cblas_drotm(const int N, double *X, const int incX,
double *Y, const int incY, const double *P);
/*
* Routines with S D C Z CS and ZD prefixes
*/
void cblas_sscal(const int N, const float alpha, float *X, const int incX);
void cblas_dscal(const int N, const double alpha, double *X, const int incX);
void cblas_cscal(const int N, const void *alpha, void *X, const int incX);
void cblas_zscal(const int N, const void *alpha, void *X, const int incX);
void cblas_csscal(const int N, const float alpha, void *X, const int incX);
void cblas_zdscal(const int N, const double alpha, void *X, const int incX);
/*
* ===========================================================================
* Prototypes for level 2 BLAS
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void cblas_sgemv(const CBLAS_LAYOUT layout,
const CBLAS_TRANSPOSE TransA, const int M, const int N,
const float alpha, const float *A, const int lda,
const float *X, const int incX, const float beta,
float *Y, const int incY);
void cblas_sgbmv(CBLAS_LAYOUT layout,
CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const float alpha,
const float *A, const int lda, const float *X,
const int incX, const float beta, float *Y, const int incY);
void cblas_strmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const float *A, const int lda,
float *X, const int incX);
void cblas_stbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const float *A, const int lda,
float *X, const int incX);
void cblas_stpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const float *Ap, float *X, const int incX);
void cblas_strsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const float *A, const int lda, float *X,
const int incX);
void cblas_stbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const float *A, const int lda,
float *X, const int incX);
void cblas_stpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const float *Ap, float *X, const int incX);
void cblas_dgemv(CBLAS_LAYOUT layout,
CBLAS_TRANSPOSE TransA, const int M, const int N,
const double alpha, const double *A, const int lda,
const double *X, const int incX, const double beta,
double *Y, const int incY);
void cblas_dgbmv(CBLAS_LAYOUT layout,
CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const double alpha,
const double *A, const int lda, const double *X,
const int incX, const double beta, double *Y, const int incY);
void cblas_dtrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const double *A, const int lda,
double *X, const int incX);
void cblas_dtbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const double *A, const int lda,
double *X, const int incX);
void cblas_dtpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const double *Ap, double *X, const int incX);
void cblas_dtrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const double *A, const int lda, double *X,
const int incX);
void cblas_dtbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const double *A, const int lda,
double *X, const int incX);
void cblas_dtpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const double *Ap, double *X, const int incX);
void cblas_cgemv(CBLAS_LAYOUT layout,
CBLAS_TRANSPOSE TransA, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY);
void cblas_cgbmv(CBLAS_LAYOUT layout,
CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const void *alpha,
const void *A, const int lda, const void *X,
const int incX, const void *beta, void *Y, const int incY);
void cblas_ctrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *A, const int lda,
void *X, const int incX);
void cblas_ctbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ctpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
void cblas_ctrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *A, const int lda, void *X,
const int incX);
void cblas_ctbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ctpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
void cblas_zgemv(CBLAS_LAYOUT layout,
CBLAS_TRANSPOSE TransA, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY);
void cblas_zgbmv(CBLAS_LAYOUT layout,
CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU, const void *alpha,
const void *A, const int lda, const void *X,
const int incX, const void *beta, void *Y, const int incY);
void cblas_ztrmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *A, const int lda,
void *X, const int incX);
void cblas_ztbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ztpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
void cblas_ztrsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *A, const int lda, void *X,
const int incX);
void cblas_ztbsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX);
void cblas_ztpsv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE TransA, CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX);
/*
* Routines with S and D prefixes only
*/
void cblas_ssymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const float *A,
const int lda, const float *X, const int incX,
const float beta, float *Y, const int incY);
void cblas_ssbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const int K, const float alpha, const float *A,
const int lda, const float *X, const int incX,
const float beta, float *Y, const int incY);
void cblas_sspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const float *Ap,
const float *X, const int incX,
const float beta, float *Y, const int incY);
void cblas_sger(CBLAS_LAYOUT layout, const int M, const int N,
const float alpha, const float *X, const int incX,
const float *Y, const int incY, float *A, const int lda);
void cblas_ssyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, float *A, const int lda);
void cblas_sspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, float *Ap);
void cblas_ssyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, const float *Y, const int incY, float *A,
const int lda);
void cblas_sspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const float *X,
const int incX, const float *Y, const int incY, float *A);
void cblas_dsymv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const double *A,
const int lda, const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dsbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const int K, const double alpha, const double *A,
const int lda, const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dspmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const double *Ap,
const double *X, const int incX,
const double beta, double *Y, const int incY);
void cblas_dger(CBLAS_LAYOUT layout, const int M, const int N,
const double alpha, const double *X, const int incX,
const double *Y, const int incY, double *A, const int lda);
void cblas_dsyr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, double *A, const int lda);
void cblas_dspr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, double *Ap);
void cblas_dsyr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, const double *Y, const int incY, double *A,
const int lda);
void cblas_dspr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const double *X,
const int incX, const double *Y, const int incY, double *A);
/*
* Routines with C and Z prefixes only
*/
void cblas_chemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_chbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const int K, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_chpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *Ap,
const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_cgeru(CBLAS_LAYOUT layout, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_cgerc(CBLAS_LAYOUT layout, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_cher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const void *X, const int incX,
void *A, const int lda);
void cblas_chpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const float alpha, const void *X,
const int incX, void *A);
void cblas_cher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_chpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *Ap);
void cblas_zhemv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_zhbmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const int K, const void *alpha, const void *A,
const int lda, const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_zhpmv(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *Ap,
const void *X, const int incX,
const void *beta, void *Y, const int incY);
void cblas_zgeru(CBLAS_LAYOUT layout, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_zgerc(CBLAS_LAYOUT layout, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_zher(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const void *X, const int incX,
void *A, const int lda);
void cblas_zhpr(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
const int N, const double alpha, const void *X,
const int incX, void *A);
void cblas_zher2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda);
void cblas_zhpr2(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *Ap);
/*
* ===========================================================================
* Prototypes for level 3 BLAS
* ===========================================================================
*/
/*
* Routines with standard 4 prefixes (S, D, C, Z)
*/
void cblas_sgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const float alpha, const float *A,
const int lda, const float *B, const int ldb,
const float beta, float *C, const int ldc);
void cblas_ssymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, const int M, const int N,
const float alpha, const float *A, const int lda,
const float *B, const int ldb, const float beta,
float *C, const int ldc);
void cblas_ssyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const float alpha, const float *A, const int lda,
const float beta, float *C, const int ldc);
void cblas_ssyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const float alpha, const float *A, const int lda,
const float *B, const int ldb, const float beta,
float *C, const int ldc);
void cblas_strmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const float alpha, const float *A, const int lda,
float *B, const int ldb);
void cblas_strsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const float alpha, const float *A, const int lda,
float *B, const int ldb);
void cblas_dgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const double alpha, const double *A,
const int lda, const double *B, const int ldb,
const double beta, double *C, const int ldc);
void cblas_dsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, const int M, const int N,
const double alpha, const double *A, const int lda,
const double *B, const int ldb, const double beta,
double *C, const int ldc);
void cblas_dsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const double *A, const int lda,
const double beta, double *C, const int ldc);
void cblas_dsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const double *A, const int lda,
const double *B, const int ldb, const double beta,
double *C, const int ldc);
void cblas_dtrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const double alpha, const double *A, const int lda,
double *B, const int ldb);
void cblas_dtrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const double alpha, const double *A, const int lda,
double *B, const int ldb);
void cblas_cgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const void *alpha, const void *A,
const int lda, const void *B, const int ldb,
const void *beta, void *C, const int ldc);
void cblas_csymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_csyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *beta, void *C, const int ldc);
void cblas_csyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_ctrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
void cblas_ctrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
void cblas_zgemm(CBLAS_LAYOUT layout, CBLAS_TRANSPOSE TransA,
CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const void *alpha, const void *A,
const int lda, const void *B, const int ldb,
const void *beta, void *C, const int ldc);
void cblas_zsymm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_zsyrk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *beta, void *C, const int ldc);
void cblas_zsyr2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_ztrmm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
void cblas_ztrsm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, CBLAS_TRANSPOSE TransA,
CBLAS_DIAG Diag, const int M, const int N,
const void *alpha, const void *A, const int lda,
void *B, const int ldb);
/*
* Routines with prefixes C and Z only
*/
void cblas_chemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_cherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const float alpha, const void *A, const int lda,
const float beta, void *C, const int ldc);
void cblas_cher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const float beta,
void *C, const int ldc);
void cblas_zhemm(CBLAS_LAYOUT layout, CBLAS_SIDE Side,
CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc);
void cblas_zherk(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const double alpha, const void *A, const int lda,
const double beta, void *C, const int ldc);
void cblas_zher2k(CBLAS_LAYOUT layout, CBLAS_UPLO Uplo,
CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const double beta,
void *C, const int ldc);
void cblas_xerbla(int p, const char *rout, const char *form, ...);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,394 @@
/*
* cblas_f77.h
* Written by Keita Teranishi
*
* Updated by Jeff Horner
* Merged cblas_f77.h and cblas_fortran_header.h
*/
#ifndef CBLAS_F77_H
#define CBLAS_F77_H
#ifdef CRAY
#include <fortran.h>
#define F77_CHAR _fcd
#define C2F_CHAR(a) ( _cptofcd( (a), 1 ) )
#define C2F_STR(a, i) ( _cptofcd( (a), (i) ) )
#define F77_STRLEN(a) (_fcdlen)
#endif
#ifdef WeirdNEC
#define F77_INT long
#endif
#ifdef F77_CHAR
#define FCHAR F77_CHAR
#else
#define FCHAR char *
#endif
#ifdef F77_INT
#define FINT const F77_INT *
#define FINT2 F77_INT *
#else
#define FINT const int *
#define FINT2 int *
#endif
/*
* Level 1 BLAS
*/
#define F77_xerbla F77_GLOBAL(xerbla,XERBLA)
#define F77_srotg F77_GLOBAL(srotg,SROTG)
#define F77_srotmg F77_GLOBAL(srotmg,SROTMG)
#define F77_srot F77_GLOBAL(srot,SROT)
#define F77_srotm F77_GLOBAL(srotm,SROTM)
#define F77_drotg F77_GLOBAL(drotg,DROTG)
#define F77_drotmg F77_GLOBAL(drotmg,DROTMG)
#define F77_drot F77_GLOBAL(drot,DROT)
#define F77_drotm F77_GLOBAL(drotm,DROTM)
#define F77_sswap F77_GLOBAL(sswap,SSWAP)
#define F77_scopy F77_GLOBAL(scopy,SCOPY)
#define F77_saxpy F77_GLOBAL(saxpy,SAXPY)
#define F77_isamax_sub F77_GLOBAL(isamaxsub,ISAMAXSUB)
#define F77_dswap F77_GLOBAL(dswap,DSWAP)
#define F77_dcopy F77_GLOBAL(dcopy,DCOPY)
#define F77_daxpy F77_GLOBAL(daxpy,DAXPY)
#define F77_idamax_sub F77_GLOBAL(idamaxsub,IDAMAXSUB)
#define F77_cswap F77_GLOBAL(cswap,CSWAP)
#define F77_ccopy F77_GLOBAL(ccopy,CCOPY)
#define F77_caxpy F77_GLOBAL(caxpy,CAXPY)
#define F77_icamax_sub F77_GLOBAL(icamaxsub,ICAMAXSUB)
#define F77_zswap F77_GLOBAL(zswap,ZSWAP)
#define F77_zcopy F77_GLOBAL(zcopy,ZCOPY)
#define F77_zaxpy F77_GLOBAL(zaxpy,ZAXPY)
#define F77_izamax_sub F77_GLOBAL(izamaxsub,IZAMAXSUB)
#define F77_sdot_sub F77_GLOBAL(sdotsub,SDOTSUB)
#define F77_ddot_sub F77_GLOBAL(ddotsub,DDOTSUB)
#define F77_dsdot_sub F77_GLOBAL(dsdotsub,DSDOTSUB)
#define F77_sscal F77_GLOBAL(sscal,SSCAL)
#define F77_dscal F77_GLOBAL(dscal,DSCAL)
#define F77_cscal F77_GLOBAL(cscal,CSCAL)
#define F77_zscal F77_GLOBAL(zscal,ZSCAL)
#define F77_csscal F77_GLOBAL(csscal,CSSCAL)
#define F77_zdscal F77_GLOBAL(zdscal,ZDSCAL)
#define F77_cdotu_sub F77_GLOBAL(cdotusub,CDOTUSUB)
#define F77_cdotc_sub F77_GLOBAL(cdotcsub,CDOTCSUB)
#define F77_zdotu_sub F77_GLOBAL(zdotusub,ZDOTUSUB)
#define F77_zdotc_sub F77_GLOBAL(zdotcsub,ZDOTCSUB)
#define F77_snrm2_sub F77_GLOBAL(snrm2sub,SNRM2SUB)
#define F77_sasum_sub F77_GLOBAL(sasumsub,SASUMSUB)
#define F77_dnrm2_sub F77_GLOBAL(dnrm2sub,DNRM2SUB)
#define F77_dasum_sub F77_GLOBAL(dasumsub,DASUMSUB)
#define F77_scnrm2_sub F77_GLOBAL(scnrm2sub,SCNRM2SUB)
#define F77_scasum_sub F77_GLOBAL(scasumsub,SCASUMSUB)
#define F77_dznrm2_sub F77_GLOBAL(dznrm2sub,DZNRM2SUB)
#define F77_dzasum_sub F77_GLOBAL(dzasumsub,DZASUMSUB)
#define F77_sdsdot_sub F77_GLOBAL(sdsdotsub,SDSDOTSUB)
/*
* Level 2 BLAS
*/
#define F77_ssymv F77_GLOBAL(ssymv,SSYMY)
#define F77_ssbmv F77_GLOBAL(ssbmv,SSMBV)
#define F77_sspmv F77_GLOBAL(sspmv,SSPMV)
#define F77_sger F77_GLOBAL(sger,SGER)
#define F77_ssyr F77_GLOBAL(ssyr,SSYR)
#define F77_sspr F77_GLOBAL(sspr,SSPR)
#define F77_ssyr2 F77_GLOBAL(ssyr2,SSYR2)
#define F77_sspr2 F77_GLOBAL(sspr2,SSPR2)
#define F77_dsymv F77_GLOBAL(dsymv,DSYMV)
#define F77_dsbmv F77_GLOBAL(dsbmv,DSBMV)
#define F77_dspmv F77_GLOBAL(dspmv,DSPMV)
#define F77_dger F77_GLOBAL(dger,DGER)
#define F77_dsyr F77_GLOBAL(dsyr,DSYR)
#define F77_dspr F77_GLOBAL(dspr,DSPR)
#define F77_dsyr2 F77_GLOBAL(dsyr2,DSYR2)
#define F77_dspr2 F77_GLOBAL(dspr2,DSPR2)
#define F77_chemv F77_GLOBAL(chemv,CHEMV)
#define F77_chbmv F77_GLOBAL(chbmv,CHBMV)
#define F77_chpmv F77_GLOBAL(chpmv,CHPMV)
#define F77_cgeru F77_GLOBAL(cgeru,CGERU)
#define F77_cgerc F77_GLOBAL(cgerc,CGERC)
#define F77_cher F77_GLOBAL(cher,CHER)
#define F77_chpr F77_GLOBAL(chpr,CHPR)
#define F77_cher2 F77_GLOBAL(cher2,CHER2)
#define F77_chpr2 F77_GLOBAL(chpr2,CHPR2)
#define F77_zhemv F77_GLOBAL(zhemv,ZHEMV)
#define F77_zhbmv F77_GLOBAL(zhbmv,ZHBMV)
#define F77_zhpmv F77_GLOBAL(zhpmv,ZHPMV)
#define F77_zgeru F77_GLOBAL(zgeru,ZGERU)
#define F77_zgerc F77_GLOBAL(zgerc,ZGERC)
#define F77_zher F77_GLOBAL(zher,ZHER)
#define F77_zhpr F77_GLOBAL(zhpr,ZHPR)
#define F77_zher2 F77_GLOBAL(zher2,ZHER2)
#define F77_zhpr2 F77_GLOBAL(zhpr2,ZHPR2)
#define F77_sgemv F77_GLOBAL(sgemv,SGEMV)
#define F77_sgbmv F77_GLOBAL(sgbmv,SGBMV)
#define F77_strmv F77_GLOBAL(strmv,STRMV)
#define F77_stbmv F77_GLOBAL(stbmv,STBMV)
#define F77_stpmv F77_GLOBAL(stpmv,STPMV)
#define F77_strsv F77_GLOBAL(strsv,STRSV)
#define F77_stbsv F77_GLOBAL(stbsv,STBSV)
#define F77_stpsv F77_GLOBAL(stpsv,STPSV)
#define F77_dgemv F77_GLOBAL(dgemv,DGEMV)
#define F77_dgbmv F77_GLOBAL(dgbmv,DGBMV)
#define F77_dtrmv F77_GLOBAL(dtrmv,DTRMV)
#define F77_dtbmv F77_GLOBAL(dtbmv,DTBMV)
#define F77_dtpmv F77_GLOBAL(dtpmv,DTRMV)
#define F77_dtrsv F77_GLOBAL(dtrsv,DTRSV)
#define F77_dtbsv F77_GLOBAL(dtbsv,DTBSV)
#define F77_dtpsv F77_GLOBAL(dtpsv,DTPSV)
#define F77_cgemv F77_GLOBAL(cgemv,CGEMV)
#define F77_cgbmv F77_GLOBAL(cgbmv,CGBMV)
#define F77_ctrmv F77_GLOBAL(ctrmv,CTRMV)
#define F77_ctbmv F77_GLOBAL(ctbmv,CTBMV)
#define F77_ctpmv F77_GLOBAL(ctpmv,CTPMV)
#define F77_ctrsv F77_GLOBAL(ctrsv,CTRSV)
#define F77_ctbsv F77_GLOBAL(ctbsv,CTBSV)
#define F77_ctpsv F77_GLOBAL(ctpsv,CTPSV)
#define F77_zgemv F77_GLOBAL(zgemv,ZGEMV)
#define F77_zgbmv F77_GLOBAL(zgbmv,ZGBMV)
#define F77_ztrmv F77_GLOBAL(ztrmv,ZTRMV)
#define F77_ztbmv F77_GLOBAL(ztbmv,ZTBMV)
#define F77_ztpmv F77_GLOBAL(ztpmv,ZTPMV)
#define F77_ztrsv F77_GLOBAL(ztrsv,ZTRSV)
#define F77_ztbsv F77_GLOBAL(ztbsv,ZTBSV)
#define F77_ztpsv F77_GLOBAL(ztpsv,ZTPSV)
/*
* Level 3 BLAS
*/
#define F77_chemm F77_GLOBAL(chemm,CHEMM)
#define F77_cherk F77_GLOBAL(cherk,CHERK)
#define F77_cher2k F77_GLOBAL(cher2k,CHER2K)
#define F77_zhemm F77_GLOBAL(zhemm,ZHEMM)
#define F77_zherk F77_GLOBAL(zherk,ZHERK)
#define F77_zher2k F77_GLOBAL(zher2k,ZHER2K)
#define F77_sgemm F77_GLOBAL(sgemm,SGEMM)
#define F77_ssymm F77_GLOBAL(ssymm,SSYMM)
#define F77_ssyrk F77_GLOBAL(ssyrk,SSYRK)
#define F77_ssyr2k F77_GLOBAL(ssyr2k,SSYR2K)
#define F77_strmm F77_GLOBAL(strmm,STRMM)
#define F77_strsm F77_GLOBAL(strsm,STRSM)
#define F77_dgemm F77_GLOBAL(dgemm,DGEMM)
#define F77_dsymm F77_GLOBAL(dsymm,DSYMM)
#define F77_dsyrk F77_GLOBAL(dsyrk,DSYRK)
#define F77_dsyr2k F77_GLOBAL(dsyr2k,DSYR2K)
#define F77_dtrmm F77_GLOBAL(dtrmm,DTRMM)
#define F77_dtrsm F77_GLOBAL(dtrsm,DTRSM)
#define F77_cgemm F77_GLOBAL(cgemm,CGEMM)
#define F77_csymm F77_GLOBAL(csymm,CSYMM)
#define F77_csyrk F77_GLOBAL(csyrk,CSYRK)
#define F77_csyr2k F77_GLOBAL(csyr2k,CSYR2K)
#define F77_ctrmm F77_GLOBAL(ctrmm,CTRMM)
#define F77_ctrsm F77_GLOBAL(ctrsm,CTRSM)
#define F77_zgemm F77_GLOBAL(zgemm,ZGEMM)
#define F77_zsymm F77_GLOBAL(zsymm,ZSYMM)
#define F77_zsyrk F77_GLOBAL(zsyrk,ZSYRK)
#define F77_zsyr2k F77_GLOBAL(zsyr2k,ZSYR2K)
#define F77_ztrmm F77_GLOBAL(ztrmm,ZTRMM)
#define F77_ztrsm F77_GLOBAL(ztrsm,ZTRSM)
#ifdef __cplusplus
extern "C" {
#endif
void F77_xerbla(FCHAR, void *);
/*
* Level 1 Fortran Prototypes
*/
/* Single Precision */
void F77_srot(FINT, float *, FINT, float *, FINT, const float *, const float *);
void F77_srotg(float *,float *,float *,float *);
void F77_srotm( FINT, float *, FINT, float *, FINT, const float *);
void F77_srotmg(float *,float *,float *,const float *, float *);
void F77_sswap( FINT, float *, FINT, float *, FINT);
void F77_scopy( FINT, const float *, FINT, float *, FINT);
void F77_saxpy( FINT, const float *, const float *, FINT, float *, FINT);
void F77_sdot_sub(FINT, const float *, FINT, const float *, FINT, float *);
void F77_sdsdot_sub( FINT, const float *, const float *, FINT, const float *, FINT, float *);
void F77_sscal( FINT, const float *, float *, FINT);
void F77_snrm2_sub( FINT, const float *, FINT, float *);
void F77_sasum_sub( FINT, const float *, FINT, float *);
void F77_isamax_sub( FINT, const float * , FINT, FINT2);
/* Double Precision */
void F77_drot(FINT, double *, FINT, double *, FINT, const double *, const double *);
void F77_drotg(double *,double *,double *,double *);
void F77_drotm( FINT, double *, FINT, double *, FINT, const double *);
void F77_drotmg(double *,double *,double *,const double *, double *);
void F77_dswap( FINT, double *, FINT, double *, FINT);
void F77_dcopy( FINT, const double *, FINT, double *, FINT);
void F77_daxpy( FINT, const double *, const double *, FINT, double *, FINT);
void F77_dswap( FINT, double *, FINT, double *, FINT);
void F77_dsdot_sub(FINT, const float *, FINT, const float *, FINT, double *);
void F77_ddot_sub( FINT, const double *, FINT, const double *, FINT, double *);
void F77_dscal( FINT, const double *, double *, FINT);
void F77_dnrm2_sub( FINT, const double *, FINT, double *);
void F77_dasum_sub( FINT, const double *, FINT, double *);
void F77_idamax_sub( FINT, const double * , FINT, FINT2);
/* Single Complex Precision */
void F77_cswap( FINT, void *, FINT, void *, FINT);
void F77_ccopy( FINT, const void *, FINT, void *, FINT);
void F77_caxpy( FINT, const void *, const void *, FINT, void *, FINT);
void F77_cswap( FINT, void *, FINT, void *, FINT);
void F77_cdotc_sub( FINT, const void *, FINT, const void *, FINT, void *);
void F77_cdotu_sub( FINT, const void *, FINT, const void *, FINT, void *);
void F77_cscal( FINT, const void *, void *, FINT);
void F77_icamax_sub( FINT, const void *, FINT, FINT2);
void F77_csscal( FINT, const float *, void *, FINT);
void F77_scnrm2_sub( FINT, const void *, FINT, float *);
void F77_scasum_sub( FINT, const void *, FINT, float *);
/* Double Complex Precision */
void F77_zswap( FINT, void *, FINT, void *, FINT);
void F77_zcopy( FINT, const void *, FINT, void *, FINT);
void F77_zaxpy( FINT, const void *, const void *, FINT, void *, FINT);
void F77_zswap( FINT, void *, FINT, void *, FINT);
void F77_zdotc_sub( FINT, const void *, FINT, const void *, FINT, void *);
void F77_zdotu_sub( FINT, const void *, FINT, const void *, FINT, void *);
void F77_zdscal( FINT, const double *, void *, FINT);
void F77_zscal( FINT, const void *, void *, FINT);
void F77_dznrm2_sub( FINT, const void *, FINT, double *);
void F77_dzasum_sub( FINT, const void *, FINT, double *);
void F77_izamax_sub( FINT, const void *, FINT, FINT2);
/*
* Level 2 Fortran Prototypes
*/
/* Single Precision */
void F77_sgemv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_sgbmv(FCHAR, FINT, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_ssymv(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_ssbmv(FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_sspmv(FCHAR, FINT, const float *, const float *, const float *, FINT, const float *, float *, FINT);
void F77_strmv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
void F77_stbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT);
void F77_strsv( FCHAR, FCHAR, FCHAR, FINT, const float *, FINT, float *, FINT);
void F77_stbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, FINT, float *, FINT);
void F77_stpmv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT);
void F77_stpsv( FCHAR, FCHAR, FCHAR, FINT, const float *, float *, FINT);
void F77_sger( FINT, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT);
void F77_ssyr(FCHAR, FINT, const float *, const float *, FINT, float *, FINT);
void F77_sspr(FCHAR, FINT, const float *, const float *, FINT, float *);
void F77_sspr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *);
void F77_ssyr2(FCHAR, FINT, const float *, const float *, FINT, const float *, FINT, float *, FINT);
/* Double Precision */
void F77_dgemv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_dgbmv(FCHAR, FINT, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_dsymv(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_dsbmv(FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_dspmv(FCHAR, FINT, const double *, const double *, const double *, FINT, const double *, double *, FINT);
void F77_dtrmv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
void F77_dtbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT);
void F77_dtrsv( FCHAR, FCHAR, FCHAR, FINT, const double *, FINT, double *, FINT);
void F77_dtbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, FINT, double *, FINT);
void F77_dtpmv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT);
void F77_dtpsv( FCHAR, FCHAR, FCHAR, FINT, const double *, double *, FINT);
void F77_dger( FINT, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT);
void F77_dsyr(FCHAR, FINT, const double *, const double *, FINT, double *, FINT);
void F77_dspr(FCHAR, FINT, const double *, const double *, FINT, double *);
void F77_dspr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *);
void F77_dsyr2(FCHAR, FINT, const double *, const double *, FINT, const double *, FINT, double *, FINT);
/* Single Complex Precision */
void F77_cgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_cgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_chemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_chbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_chpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT);
void F77_ctrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
void F77_ctbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
void F77_ctpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT);
void F77_ctrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
void F77_ctbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
void F77_ctpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT);
void F77_cgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
void F77_cgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
void F77_cher(FCHAR, FINT, const float *, const void *, FINT, void *, FINT);
void F77_cher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
void F77_chpr(FCHAR, FINT, const float *, const void *, FINT, void *);
void F77_chpr2(FCHAR, FINT, const float *, const void *, FINT, const void *, FINT, void *);
/* Double Complex Precision */
void F77_zgemv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_zgbmv(FCHAR, FINT, FINT, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_zhemv(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_zhbmv(FCHAR, FINT, FINT, const void *, const void *, FINT, const void *, FINT, const void *, void *, FINT);
void F77_zhpmv(FCHAR, FINT, const void *, const void *, const void *, FINT, const void *, void *, FINT);
void F77_ztrmv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
void F77_ztbmv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
void F77_ztpmv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *, FINT);
void F77_ztrsv( FCHAR, FCHAR, FCHAR, FINT, const void *, FINT, void *, FINT);
void F77_ztbsv( FCHAR, FCHAR, FCHAR, FINT, FINT, const void *, FINT, void *, FINT);
void F77_ztpsv( FCHAR, FCHAR, FCHAR, FINT, const void *, void *,FINT);
void F77_zgerc( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
void F77_zgeru( FINT, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
void F77_zher(FCHAR, FINT, const double *, const void *, FINT, void *, FINT);
void F77_zher2(FCHAR, FINT, const void *, const void *, FINT, const void *, FINT, void *, FINT);
void F77_zhpr(FCHAR, FINT, const double *, const void *, FINT, void *);
void F77_zhpr2(FCHAR, FINT, const double *, const void *, FINT, const void *, FINT, void *);
/*
* Level 3 Fortran Prototypes
*/
/* Single Precision */
void F77_sgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_ssymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_ssyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
void F77_ssyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_strmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
void F77_strsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
/* Double Precision */
void F77_dgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_dsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_dsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
void F77_dsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_dtrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
void F77_dtrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
/* Single Complex Precision */
void F77_cgemm(FCHAR, FCHAR, FINT, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_csymm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_chemm(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_csyrk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
void F77_cherk(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, float *, FINT);
void F77_csyr2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_cher2k(FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, const float *, FINT, const float *, float *, FINT);
void F77_ctrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
void F77_ctrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const float *, const float *, FINT, float *, FINT);
/* Double Complex Precision */
void F77_zgemm(FCHAR, FCHAR, FINT, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_zsymm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_zhemm(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_zsyrk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
void F77_zherk(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, double *, FINT);
void F77_zsyr2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_zher2k(FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, const double *, FINT, const double *, double *, FINT);
void F77_ztrmm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
void F77_ztrsm(FCHAR, FCHAR, FCHAR, FCHAR, FINT, FINT, const double *, const double *, FINT, double *, FINT);
#ifdef __cplusplus
}
#endif
#endif /* CBLAS_F77_H */

View File

@ -0,0 +1,17 @@
#ifndef F77_HEADER_INCLUDED
#define F77_HEADER_INCLUDED
#ifndef F77_GLOBAL
#if defined(F77_GLOBAL_PATTERN_LC) || defined(ADD_)
#define F77_GLOBAL(lcname,UCNAME) lcname##_
#elif defined(F77_GLOBAL_PATTERN_UC) || defined(UPPER)
#define F77_GLOBAL(lcname,UCNAME) UCNAME
#elif defined(F77_GLOBAL_PATTERN_MC) || defined(NOCHANGE)
#define F77_GLOBAL(lcname,UCNAME) lcname
#else
#define F77_GLOBAL(lcname,UCNAME) lcname##_
#endif
#endif
#endif

View File

@ -0,0 +1,190 @@
/*
* cblas_test.h
* Written by Keita Teranishi
*/
#ifndef CBLAS_TEST_H
#define CBLAS_TEST_H
#include "cblas.h"
#include "cblas_mangling.h"
#define TRUE 1
#define PASSED 1
#define TEST_ROW_MJR 1
#define FALSE 0
#define FAILED 0
#define TEST_COL_MJR 0
#define INVALID -1
#define UNDEFINED -1
typedef struct { float real; float imag; } CBLAS_TEST_COMPLEX;
typedef struct { double real; double imag; } CBLAS_TEST_ZOMPLEX;
#define F77_xerbla F77_GLOBAL(xerbla,XERBLA)
/*
* Level 1 BLAS
*/
#define F77_srotg F77_GLOBAL(srotgtest,SROTGTEST)
#define F77_srotmg F77_GLOBAL(srotmgtest,SROTMGTEST)
#define F77_srot F77_GLOBAL(srottest,SROTTEST)
#define F77_srotm F77_GLOBAL(srotmtest,SROTMTEST)
#define F77_drotg F77_GLOBAL(drotgtest,DROTGTEST)
#define F77_drotmg F77_GLOBAL(drotmgtest,DROTMGTEST)
#define F77_drot F77_GLOBAL(drottest,DROTTEST)
#define F77_drotm F77_GLOBAL(drotmtest,DROTMTEST)
#define F77_sswap F77_GLOBAL(sswaptest,SSWAPTEST)
#define F77_scopy F77_GLOBAL(scopytest,SCOPYTEST)
#define F77_saxpy F77_GLOBAL(saxpytest,SAXPYTEST)
#define F77_isamax F77_GLOBAL(isamaxtest,ISAMAXTEST)
#define F77_dswap F77_GLOBAL(dswaptest,DSWAPTEST)
#define F77_dcopy F77_GLOBAL(dcopytest,DCOPYTEST)
#define F77_daxpy F77_GLOBAL(daxpytest,DAXPYTEST)
#define F77_idamax F77_GLOBAL(idamaxtest,IDAMAXTEST)
#define F77_cswap F77_GLOBAL(cswaptest,CSWAPTEST)
#define F77_ccopy F77_GLOBAL(ccopytest,CCOPYTEST)
#define F77_caxpy F77_GLOBAL(caxpytest,CAXPYTEST)
#define F77_icamax F77_GLOBAL(icamaxtest,ICAMAXTEST)
#define F77_zswap F77_GLOBAL(zswaptest,ZSWAPTEST)
#define F77_zcopy F77_GLOBAL(zcopytest,ZCOPYTEST)
#define F77_zaxpy F77_GLOBAL(zaxpytest,ZAXPYTEST)
#define F77_izamax F77_GLOBAL(izamaxtest,IZAMAXTEST)
#define F77_sdot F77_GLOBAL(sdottest,SDOTTEST)
#define F77_ddot F77_GLOBAL(ddottest,DDOTTEST)
#define F77_dsdot F77_GLOBAL(dsdottest,DSDOTTEST)
#define F77_sscal F77_GLOBAL(sscaltest,SSCALTEST)
#define F77_dscal F77_GLOBAL(dscaltest,DSCALTEST)
#define F77_cscal F77_GLOBAL(cscaltest,CSCALTEST)
#define F77_zscal F77_GLOBAL(zscaltest,ZSCALTEST)
#define F77_csscal F77_GLOBAL(csscaltest,CSSCALTEST)
#define F77_zdscal F77_GLOBAL(zdscaltest,ZDSCALTEST)
#define F77_cdotu F77_GLOBAL(cdotutest,CDOTUTEST)
#define F77_cdotc F77_GLOBAL(cdotctest,CDOTCTEST)
#define F77_zdotu F77_GLOBAL(zdotutest,ZDOTUTEST)
#define F77_zdotc F77_GLOBAL(zdotctest,ZDOTCTEST)
#define F77_snrm2 F77_GLOBAL(snrm2test,SNRM2TEST)
#define F77_sasum F77_GLOBAL(sasumtest,SASUMTEST)
#define F77_dnrm2 F77_GLOBAL(dnrm2test,DNRM2TEST)
#define F77_dasum F77_GLOBAL(dasumtest,DASUMTEST)
#define F77_scnrm2 F77_GLOBAL(scnrm2test,SCNRM2TEST)
#define F77_scasum F77_GLOBAL(scasumtest,SCASUMTEST)
#define F77_dznrm2 F77_GLOBAL(dznrm2test,DZNRM2TEST)
#define F77_dzasum F77_GLOBAL(dzasumtest,DZASUMTEST)
#define F77_sdsdot F77_GLOBAL(sdsdottest, SDSDOTTEST)
/*
* Level 2 BLAS
*/
#define F77_s2chke F77_GLOBAL(cs2chke,CS2CHKE)
#define F77_d2chke F77_GLOBAL(cd2chke,CD2CHKE)
#define F77_c2chke F77_GLOBAL(cc2chke,CC2CHKE)
#define F77_z2chke F77_GLOBAL(cz2chke,CZ2CHKE)
#define F77_ssymv F77_GLOBAL(cssymv,CSSYMV)
#define F77_ssbmv F77_GLOBAL(cssbmv,CSSBMV)
#define F77_sspmv F77_GLOBAL(csspmv,CSSPMV)
#define F77_sger F77_GLOBAL(csger,CSGER)
#define F77_ssyr F77_GLOBAL(cssyr,CSSYR)
#define F77_sspr F77_GLOBAL(csspr,CSSPR)
#define F77_ssyr2 F77_GLOBAL(cssyr2,CSSYR2)
#define F77_sspr2 F77_GLOBAL(csspr2,CSSPR2)
#define F77_dsymv F77_GLOBAL(cdsymv,CDSYMV)
#define F77_dsbmv F77_GLOBAL(cdsbmv,CDSBMV)
#define F77_dspmv F77_GLOBAL(cdspmv,CDSPMV)
#define F77_dger F77_GLOBAL(cdger,CDGER)
#define F77_dsyr F77_GLOBAL(cdsyr,CDSYR)
#define F77_dspr F77_GLOBAL(cdspr,CDSPR)
#define F77_dsyr2 F77_GLOBAL(cdsyr2,CDSYR2)
#define F77_dspr2 F77_GLOBAL(cdspr2,CDSPR2)
#define F77_chemv F77_GLOBAL(cchemv,CCHEMV)
#define F77_chbmv F77_GLOBAL(cchbmv,CCHBMV)
#define F77_chpmv F77_GLOBAL(cchpmv,CCHPMV)
#define F77_cgeru F77_GLOBAL(ccgeru,CCGERU)
#define F77_cgerc F77_GLOBAL(ccgerc,CCGERC)
#define F77_cher F77_GLOBAL(ccher,CCHER)
#define F77_chpr F77_GLOBAL(cchpr,CCHPR)
#define F77_cher2 F77_GLOBAL(ccher2,CCHER2)
#define F77_chpr2 F77_GLOBAL(cchpr2,CCHPR2)
#define F77_zhemv F77_GLOBAL(czhemv,CZHEMV)
#define F77_zhbmv F77_GLOBAL(czhbmv,CZHBMV)
#define F77_zhpmv F77_GLOBAL(czhpmv,CZHPMV)
#define F77_zgeru F77_GLOBAL(czgeru,CZGERU)
#define F77_zgerc F77_GLOBAL(czgerc,CZGERC)
#define F77_zher F77_GLOBAL(czher,CZHER)
#define F77_zhpr F77_GLOBAL(czhpr,CZHPR)
#define F77_zher2 F77_GLOBAL(czher2,CZHER2)
#define F77_zhpr2 F77_GLOBAL(czhpr2,CZHPR2)
#define F77_sgemv F77_GLOBAL(csgemv,CSGEMV)
#define F77_sgbmv F77_GLOBAL(csgbmv,CSGBMV)
#define F77_strmv F77_GLOBAL(cstrmv,CSTRMV)
#define F77_stbmv F77_GLOBAL(cstbmv,CSTBMV)
#define F77_stpmv F77_GLOBAL(cstpmv,CSTPMV)
#define F77_strsv F77_GLOBAL(cstrsv,CSTRSV)
#define F77_stbsv F77_GLOBAL(cstbsv,CSTBSV)
#define F77_stpsv F77_GLOBAL(cstpsv,CSTPSV)
#define F77_dgemv F77_GLOBAL(cdgemv,CDGEMV)
#define F77_dgbmv F77_GLOBAL(cdgbmv,CDGBMV)
#define F77_dtrmv F77_GLOBAL(cdtrmv,CDTRMV)
#define F77_dtbmv F77_GLOBAL(cdtbmv,CDTBMV)
#define F77_dtpmv F77_GLOBAL(cdtpmv,CDTPMV)
#define F77_dtrsv F77_GLOBAL(cdtrsv,CDTRSV)
#define F77_dtbsv F77_GLOBAL(cdtbsv,CDTBSV)
#define F77_dtpsv F77_GLOBAL(cdtpsv,CDTPSV)
#define F77_cgemv F77_GLOBAL(ccgemv,CCGEMV)
#define F77_cgbmv F77_GLOBAL(ccgbmv,CCGBMV)
#define F77_ctrmv F77_GLOBAL(cctrmv,CCTRMV)
#define F77_ctbmv F77_GLOBAL(cctbmv,CCTPMV)
#define F77_ctpmv F77_GLOBAL(cctpmv,CCTPMV)
#define F77_ctrsv F77_GLOBAL(cctrsv,CCTRSV)
#define F77_ctbsv F77_GLOBAL(cctbsv,CCTBSV)
#define F77_ctpsv F77_GLOBAL(cctpsv,CCTPSV)
#define F77_zgemv F77_GLOBAL(czgemv,CZGEMV)
#define F77_zgbmv F77_GLOBAL(czgbmv,CZGBMV)
#define F77_ztrmv F77_GLOBAL(cztrmv,CZTRMV)
#define F77_ztbmv F77_GLOBAL(cztbmv,CZTBMV)
#define F77_ztpmv F77_GLOBAL(cztpmv,CZTPMV)
#define F77_ztrsv F77_GLOBAL(cztrsv,CZTRSV)
#define F77_ztbsv F77_GLOBAL(cztbsv,CZTBSV)
#define F77_ztpsv F77_GLOBAL(cztpsv,CZTPSV)
/*
* Level 3 BLAS
*/
#define F77_s3chke F77_GLOBAL(cs3chke,CS3CHKE)
#define F77_d3chke F77_GLOBAL(cd3chke,CD3CHKE)
#define F77_c3chke F77_GLOBAL(cc3chke,CC3CHKE)
#define F77_z3chke F77_GLOBAL(cz3chke,CZ3CHKE)
#define F77_chemm F77_GLOBAL(cchemm,CCHEMM)
#define F77_cherk F77_GLOBAL(ccherk,CCHERK)
#define F77_cher2k F77_GLOBAL(ccher2k,CCHER2K)
#define F77_zhemm F77_GLOBAL(czhemm,CZHEMM)
#define F77_zherk F77_GLOBAL(czherk,CZHERK)
#define F77_zher2k F77_GLOBAL(czher2k,CZHER2K)
#define F77_sgemm F77_GLOBAL(csgemm,CSGEMM)
#define F77_ssymm F77_GLOBAL(cssymm,CSSYMM)
#define F77_ssyrk F77_GLOBAL(cssyrk,CSSYRK)
#define F77_ssyr2k F77_GLOBAL(cssyr2k,CSSYR2K)
#define F77_strmm F77_GLOBAL(cstrmm,CSTRMM)
#define F77_strsm F77_GLOBAL(cstrsm,CSTRSM)
#define F77_dgemm F77_GLOBAL(cdgemm,CDGEMM)
#define F77_dsymm F77_GLOBAL(cdsymm,CDSYMM)
#define F77_dsyrk F77_GLOBAL(cdsyrk,CDSYRK)
#define F77_dsyr2k F77_GLOBAL(cdsyr2k,CDSYR2K)
#define F77_dtrmm F77_GLOBAL(cdtrmm,CDTRMM)
#define F77_dtrsm F77_GLOBAL(cdtrsm,CDTRSM)
#define F77_cgemm F77_GLOBAL(ccgemm,CCGEMM)
#define F77_csymm F77_GLOBAL(ccsymm,CCSYMM)
#define F77_csyrk F77_GLOBAL(ccsyrk,CCSYRK)
#define F77_csyr2k F77_GLOBAL(ccsyr2k,CCSYR2K)
#define F77_ctrmm F77_GLOBAL(cctrmm,CCTRMM)
#define F77_ctrsm F77_GLOBAL(cctrsm,CCTRSM)
#define F77_zgemm F77_GLOBAL(czgemm,CZGEMM)
#define F77_zsymm F77_GLOBAL(czsymm,CZSYMM)
#define F77_zsyrk F77_GLOBAL(czsyrk,CZSYRK)
#define F77_zsyr2k F77_GLOBAL(czsyr2k,CZSYR2K)
#define F77_ztrmm F77_GLOBAL(cztrmm,CZTRMM)
#define F77_ztrsm F77_GLOBAL(cztrsm, CZTRSM)
void get_transpose_type(char *type, CBLAS_TRANSPOSE *trans);
void get_uplo_type(char *type, CBLAS_UPLO *uplo);
void get_diag_type(char *type, CBLAS_DIAG *diag);
void get_side_type(char *type, CBLAS_SIDE *side);
#endif /* CBLAS_TEST_H */

View File

@ -0,0 +1,168 @@
# This Makefile compiles the CBLAS routines
#
# Error handling routines for level 2 & 3
set (ERRHAND cblas_globals.c cblas_xerbla.c xerbla.c)
#
#
# CBLAS routines
#
# Level 1
#
#
#
# All object files for single real precision
#
set (SLEV1 cblas_srotg.c cblas_srotmg.c cblas_srot.c cblas_srotm.c
cblas_sswap.c cblas_sscal.c cblas_scopy.c cblas_saxpy.c
cblas_sdot.c cblas_sdsdot.c cblas_snrm2.c cblas_sasum.c
cblas_isamax.c sdotsub.f sdsdotsub.f snrm2sub.f sasumsub.f
isamaxsub.f)
#
# All object files for double real precision
#
set (DLEV1 cblas_drotg.c cblas_drotmg.c cblas_drot.c cblas_drotm.c
cblas_dswap.c cblas_dscal.c cblas_dcopy.c cblas_daxpy.c
cblas_ddot.c cblas_dsdot.c cblas_dnrm2.c cblas_dasum.c
cblas_idamax.c ddotsub.f dsdotsub.f dnrm2sub.f
dasumsub.f idamaxsub.f)
#
# All object files for single complex precision
#
set (CLEV1 cblas_cswap.c cblas_cscal.c cblas_csscal.c cblas_ccopy.c
cblas_caxpy.c cblas_cdotu_sub.c cblas_cdotc_sub.c
cblas_icamax.c cdotcsub.f cdotusub.f icamaxsub.f)
#
# All object files for double complex precision
#
set (ZLEV1 cblas_zswap.c cblas_zscal.c cblas_zdscal.c cblas_zcopy.c
cblas_zaxpy.c cblas_zdotu_sub.c cblas_zdotc_sub.c cblas_dznrm2.c
cblas_dzasum.c cblas_izamax.c zdotcsub.f zdotusub.f
dzasumsub.f dznrm2sub.f izamaxsub.f)
#
# Common files for single complex precision
#
set (SCLEV1 cblas_scasum.c scasumsub.f cblas_scnrm2.c scnrm2sub.f)
#
# All object files
#
set (ALEV1 ${slev1} ${dlev1} ${clev1} ${zlev1} ${sclev1})
#
#
# CBLAS routines
#
# Level 2
#
#
#
# All object files for single real precision
#
set (SLEV2 cblas_sgemv.c cblas_sgbmv.c cblas_sger.c cblas_ssbmv.c cblas_sspmv.c
cblas_sspr.c cblas_sspr2.c cblas_ssymv.c cblas_ssyr.c cblas_ssyr2.c
cblas_stbmv.c cblas_stbsv.c cblas_stpmv.c cblas_stpsv.c cblas_strmv.c
cblas_strsv.c)
#
# All object files for double real precision
#
set (DLEV2 cblas_dgemv.c cblas_dgbmv.c cblas_dger.c cblas_dsbmv.c cblas_dspmv.c
cblas_dspr.c cblas_dspr2.c cblas_dsymv.c cblas_dsyr.c cblas_dsyr2.c
cblas_dtbmv.c cblas_dtbsv.c cblas_dtpmv.c cblas_dtpsv.c cblas_dtrmv.c
cblas_dtrsv.c)
#
# All object files for single complex precision
#
set (CLEV2 cblas_cgemv.c cblas_cgbmv.c cblas_chemv.c cblas_chbmv.c cblas_chpmv.c
cblas_ctrmv.c cblas_ctbmv.c cblas_ctpmv.c cblas_ctrsv.c cblas_ctbsv.c
cblas_ctpsv.c cblas_cgeru.c cblas_cgerc.c cblas_cher.c cblas_cher2.c
cblas_chpr.c cblas_chpr2.c)
#
# All object files for double complex precision
#
set (ZLEV2 cblas_zgemv.c cblas_zgbmv.c cblas_zhemv.c cblas_zhbmv.c cblas_zhpmv.c
cblas_ztrmv.c cblas_ztbmv.c cblas_ztpmv.c cblas_ztrsv.c cblas_ztbsv.c
cblas_ztpsv.c cblas_zgeru.c cblas_zgerc.c cblas_zher.c cblas_zher2.c
cblas_zhpr.c cblas_zhpr2.c)
#
# All object files
#
set (AVEL2 ${slev2} ${dlev2} ${clev2} ${zlev2})
#
#
# CBLAS routines
#
# Level 3
#
#
#
# All object files for single real precision
#
set (SLEV3 cblas_sgemm.c cblas_ssymm.c cblas_ssyrk.c cblas_ssyr2k.c cblas_strmm.c
cblas_strsm.c)
#
# All object files for double real precision
#
set (DLEV3 cblas_dgemm.c cblas_dsymm.c cblas_dsyrk.c cblas_dsyr2k.c cblas_dtrmm.c
cblas_dtrsm.c)
#
# All object files for single complex precision
#
set (CLEV3 cblas_cgemm.c cblas_csymm.c cblas_chemm.c cblas_cherk.c
cblas_cher2k.c cblas_ctrmm.c cblas_ctrsm.c cblas_csyrk.c
cblas_csyr2k.c)
#
# All object files for double complex precision
#
set (ZLEV3 cblas_zgemm.c cblas_zsymm.c cblas_zhemm.c cblas_zherk.c
cblas_zher2k.c cblas_ztrmm.c cblas_ztrsm.c cblas_zsyrk.c
cblas_zsyr2k.c)
#
# All object files
#
set (ALEV3 ${slev3} ${dlev3} ${clev3} ${zlev3})
# default build all of it
set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND}
${DLEV1} ${DLEV2} ${DLEV3}
${CLEV1} ${CLEV2} ${CLEV3}
${ZLEV1} ${ZLEV2} ${ZLEV3} )
# Single real precision
if(CBLAS_SINGLE)
set(ALLOBJ ${SCLEV1} ${SLEV1} ${SLEV2} ${SLEV3} ${ERRHAND})
endif(CBLAS_SINGLE)
# Double real precision
if(CBLAS_DOUBLE)
set(ALLOBJ ${DLEV1} ${DLEV2} ${DLEV3} ${ERRHAND})
endif(CBLAS_DOUBLE)
# Single complex precision
if (CBLAS_COMPLEX)
set(ALLOBJ ${CLEV1} ${SCLEV1} ${CLEV2} ${CLEV3} ${ERRHAND})
endif(CBLAS_COMPLEX)
# Double complex precision
if (CBLAS_COMPLEX16)
set(ALLOBJ ${ZLEV1} ${ZLEV2} ${ZLEV3} ${ERRHAND})
endif(CBLAS_COMPLEX16)
add_library(cblas ${ALLOBJ})
target_link_libraries(cblas ${BLAS_LIBRARIES} )
lapack_install_library(cblas)

View File

@ -0,0 +1,249 @@
# This Makefile compiles the CBLAS routines
#
include ../../make.inc
#
# Erase all object and archive files
#
all: cblaslib
clean:
rm -f *.o a.out core
# Error handling routines for level 2 & 3
errhand = cblas_globals.o cblas_xerbla.o xerbla.o
# Object files of all routines
alev = $(alev1) $(alev2) $(alev3) $(errhand)
#
#
# CBLAS routines
#
# Level 1
#
#
#
# All object files for single real precision
#
slev1 = cblas_srotg.o cblas_srotmg.o cblas_srot.o cblas_srotm.o \
cblas_sswap.o cblas_sscal.o cblas_scopy.o cblas_saxpy.o \
cblas_sdot.o cblas_sdsdot.o cblas_snrm2.o cblas_sasum.o \
cblas_isamax.o sdotsub.o sdsdotsub.o snrm2sub.o sasumsub.o \
isamaxsub.o
#
# All object files for double real precision
#
dlev1 = cblas_drotg.o cblas_drotmg.o cblas_drot.o cblas_drotm.o \
cblas_dswap.o cblas_dscal.o cblas_dcopy.o cblas_daxpy.o \
cblas_ddot.o cblas_dsdot.o cblas_dnrm2.o cblas_dasum.o \
cblas_idamax.o ddotsub.o dsdotsub.o dnrm2sub.o \
dasumsub.o idamaxsub.o
#
# All object files for single complex precision
#
clev1 = cblas_cswap.o cblas_cscal.o cblas_csscal.o cblas_ccopy.o \
cblas_caxpy.o cblas_cdotu_sub.o cblas_cdotc_sub.o \
cblas_icamax.o cdotcsub.o cdotusub.o icamaxsub.o
#
# All object files for double complex precision
#
zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \
cblas_zaxpy.o cblas_zdotu_sub.o cblas_zdotc_sub.o cblas_dznrm2.o \
cblas_dzasum.o cblas_izamax.o zdotcsub.o zdotusub.o \
dzasumsub.o dznrm2sub.o izamaxsub.o
#
# Common files for single / complex precision
#
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o
#
# All object files
#
alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1)
#
# Make an archive file
#
# Single real precision
slib1: $(slev1) $(sclev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev1) $(sclev1)
$(RANLIB) $(CBLASLIB)
# Double real precision
dlib1: $(dlev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev1)
$(RANLIB) $(CBLASLIB)
# Single complex precision
clib1: $(clev1) $(sclev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev1) $(sclev1)
$(RANLIB) $(CBLASLIB)
# Double complex precision
zlib1: $(zlev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev1)
$(RANLIB) $(CBLASLIB)
# All precisions
all1: $(alev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev1)
$(RANLIB) $(CBLASLIB)
#
#
# CBLAS routines
#
# Level 2
#
#
#
# All object files for single real precision
#
slev2 = cblas_sgemv.o cblas_sgbmv.o cblas_sger.o cblas_ssbmv.o cblas_sspmv.o \
cblas_sspr.o cblas_sspr2.o cblas_ssymv.o cblas_ssyr.o cblas_ssyr2.o \
cblas_stbmv.o cblas_stbsv.o cblas_stpmv.o cblas_stpsv.o cblas_strmv.o \
cblas_strsv.o
#
# All object files for double real precision
#
dlev2 = cblas_dgemv.o cblas_dgbmv.o cblas_dger.o cblas_dsbmv.o cblas_dspmv.o \
cblas_dspr.o cblas_dspr2.o cblas_dsymv.o cblas_dsyr.o cblas_dsyr2.o \
cblas_dtbmv.o cblas_dtbsv.o cblas_dtpmv.o cblas_dtpsv.o cblas_dtrmv.o \
cblas_dtrsv.o
#
# All object files for single complex precision
#
clev2 = cblas_cgemv.o cblas_cgbmv.o cblas_chemv.o cblas_chbmv.o cblas_chpmv.o \
cblas_ctrmv.o cblas_ctbmv.o cblas_ctpmv.o cblas_ctrsv.o cblas_ctbsv.o \
cblas_ctpsv.o cblas_cgeru.o cblas_cgerc.o cblas_cher.o cblas_cher2.o \
cblas_chpr.o cblas_chpr2.o
#
# All object files for double complex precision
#
zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \
cblas_ztrmv.o cblas_ztbmv.o cblas_ztpmv.o cblas_ztrsv.o cblas_ztbsv.o \
cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \
cblas_zhpr.o cblas_zhpr2.o
#
# All object files
#
alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2)
#
# Make an archive file
#
# Single real precision
slib2: $(slev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev2) $(errhand)
$(RANLIB) $(CBLASLIB)
# Double real precision
dlib2: $(dlev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev2) $(errhand)
$(RANLIB) $(CBLASLIB)
# Single complex precision
clib2: $(clev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev2) $(errhand)
$(RANLIB) $(CBLASLIB)
# Double complex precision
zlib2: $(zlev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev2) $(errhand)
$(RANLIB) $(CBLASLIB)
# All precisions
all2: $(alev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev2) $(errhand)
$(RANLIB) $(CBLASLIB)
#
#
# CBLAS routines
#
# Level 3
#
#
#
# All object files for single real precision
#
slev3 = cblas_sgemm.o cblas_ssymm.o cblas_ssyrk.o cblas_ssyr2k.o cblas_strmm.o\
cblas_strsm.o
#
# All object files for double real precision
#
dlev3 = cblas_dgemm.o cblas_dsymm.o cblas_dsyrk.o cblas_dsyr2k.o cblas_dtrmm.o\
cblas_dtrsm.o
#
# All object files for single complex precision
#
clev3 = cblas_cgemm.o cblas_csymm.o cblas_chemm.o cblas_cherk.o\
cblas_cher2k.o cblas_ctrmm.o cblas_ctrsm.o cblas_csyrk.o\
cblas_csyr2k.o
#
# All object files for double complex precision
#
zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o\
cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o\
cblas_zsyr2k.o
#
# All object files
#
alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3)
#
# Make an archive file
#
# Single real precision
slib3: $(slev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(slev3) $(errhand)
$(RANLIB) $(CBLASLIB)
# Double real precision
dlib3: $(dlev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(dlev3) $(errhand)
$(RANLIB) $(CBLASLIB)
# Single complex precision
clib3: $(clev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(clev3) $(errhand)
$(RANLIB) $(CBLASLIB)
# Single complex precision
zlib3: $(zlev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(zlev3) $(errhand)
$(RANLIB) $(CBLASLIB)
# All precisions
all3: $(alev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev3)
$(RANLIB) $(CBLASLIB)
# All levels and precisions
cblaslib: $(alev)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $(alev)
$(RANLIB) $(CBLASLIB)
FRC:
@FRC=$(FRC)
.c.o:
$(CC) -c $(CFLAGS) -I ../include -o $@ $<
.f.o:
$(FORTRAN) $(OPTS) -c $< -o $@

View File

@ -0,0 +1,22 @@
/*
* cblas_caxpy.c
*
* The program is a C interface to caxpy.
*
* Written by Keita Teranishi. 2/11/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_caxpy( const int N, const void *alpha, const void *X,
const int incX, void *Y, const int incY)
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
F77_caxpy( &F77_N, alpha, X, &F77_incX, Y, &F77_incY);
}

View File

@ -0,0 +1,22 @@
/*
* cblas_ccopy.c
*
* The program is a C interface to ccopy.
*
* Written by Keita Teranishi. 2/11/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_ccopy( const int N, const void *X,
const int incX, void *Y, const int incY)
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
F77_ccopy( &F77_N, X, &F77_incX, Y, &F77_incY);
}

View File

@ -0,0 +1,24 @@
/*
* cblas_cdotc_sub.c
*
* The program is a C interface to cdotc.
* It calls the fortran wrapper before calling cdotc.
*
* Written by Keita Teranishi. 2/11/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cdotc_sub( const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotc)
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
F77_cdotc_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotc);
return;
}

View File

@ -0,0 +1,24 @@
/*
* cblas_cdotu_sub.c
*
* The program is a C interface to cdotu.
* It calls the fortran wrapper before calling cdotu.
*
* Written by Keita Teranishi. 2/11/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cdotu_sub( const int N, const void *X, const int incX,
const void *Y, const int incY, void *dotu)
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
F77_cdotu_sub( &F77_N, X, &F77_incX, Y, &F77_incY, dotu);
return;
}

View File

@ -0,0 +1,165 @@
/*
* cblas_cgbmv.c
* The program is a C interface of cgbmv
*
* Keita Teranishi 5/20/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cgbmv(const CBLAS_LAYOUT layout,
const CBLAS_TRANSPOSE TransA, const int M, const int N,
const int KL, const int KU,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY)
{
char TA;
#ifdef F77_CHAR
F77_CHAR F77_TA;
#else
#define F77_TA &TA
#endif
#ifdef F77_INT
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
F77_INT F77_KL=KL,F77_KU=KU;
#else
#define F77_M M
#define F77_N N
#define F77_lda lda
#define F77_KL KL
#define F77_KU KU
#define F77_incX incx
#define F77_incY incY
#endif
int n=0, i=0, incx=incX;
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
float ALPHA[2],BETA[2];
int tincY, tincx;
float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (TransA == CblasNoTrans) TA = 'N';
else if (TransA == CblasTrans) TA = 'T';
else if (TransA == CblasConjTrans) TA = 'C';
else
{
cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_TA = C2F_CHAR(&TA);
#endif
F77_cgbmv(F77_TA, &F77_M, &F77_N, &F77_KL, &F77_KU, alpha,
A, &F77_lda, X, &F77_incX, beta, Y, &F77_incY);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (TransA == CblasNoTrans) TA = 'T';
else if (TransA == CblasTrans) TA = 'N';
else if (TransA == CblasConjTrans)
{
ALPHA[0]= *alp;
ALPHA[1]= -alp[1];
BETA[0]= *bet;
BETA[1]= -bet[1];
TA = 'N';
if (M > 0)
{
n = M << 1;
x = malloc(n*sizeof(float));
tx = x;
if( incX > 0 ) {
i = incX << 1 ;
tincx = 2;
st= x+n;
} else {
i = incX *(-2);
tincx = -2;
st = x-2;
x +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != st);
x=tx;
#ifdef F77_INT
F77_incX = 1;
#else
incx = 1;
#endif
if( incY > 0 )
tincY = incY;
else
tincY = -incY;
y++;
if (N > 0)
{
i = tincY << 1;
n = i * N ;
st = y + n;
do {
*y = -(*y);
y += i;
} while(y != st);
y -= n;
}
}
else x = (float *) X;
}
else
{
cblas_xerbla(2, "cblas_cgbmv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_TA = C2F_CHAR(&TA);
#endif
if (TransA == CblasConjTrans)
F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, ALPHA,
A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY);
else
F77_cgbmv(F77_TA, &F77_N, &F77_M, &F77_KU, &F77_KL, alpha,
A ,&F77_lda, x,&F77_incX, beta, Y, &F77_incY);
if (TransA == CblasConjTrans)
{
if (x != X) free(x);
if (N > 0)
{
do
{
*y = -(*y);
y += i;
}
while (y != st);
}
}
}
else cblas_xerbla(1, "cblas_cgbmv", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
}

View File

@ -0,0 +1,109 @@
/*
*
* cblas_cgemm.c
* This program is a C interface to cgemm.
* Written by Keita Teranishi
* 4/8/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N,
const int K, const void *alpha, const void *A,
const int lda, const void *B, const int ldb,
const void *beta, void *C, const int ldc)
{
char TA, TB;
#ifdef F77_CHAR
F77_CHAR F77_TA, F77_TB;
#else
#define F77_TA &TA
#define F77_TB &TB
#endif
#ifdef F77_INT
F77_INT F77_M=M, F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
F77_INT F77_ldc=ldc;
#else
#define F77_M M
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_ldb ldb
#define F77_ldc ldc
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if( layout == CblasColMajor )
{
if(TransA == CblasTrans) TA='T';
else if ( TransA == CblasConjTrans ) TA='C';
else if ( TransA == CblasNoTrans ) TA='N';
else
{
cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if(TransB == CblasTrans) TB='T';
else if ( TransB == CblasConjTrans ) TB='C';
else if ( TransB == CblasNoTrans ) TB='N';
else
{
cblas_xerbla(3, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_TA = C2F_CHAR(&TA);
F77_TB = C2F_CHAR(&TB);
#endif
F77_cgemm(F77_TA, F77_TB, &F77_M, &F77_N, &F77_K, alpha, A,
&F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if(TransA == CblasTrans) TB='T';
else if ( TransA == CblasConjTrans ) TB='C';
else if ( TransA == CblasNoTrans ) TB='N';
else
{
cblas_xerbla(2, "cblas_cgemm", "Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if(TransB == CblasTrans) TA='T';
else if ( TransB == CblasConjTrans ) TA='C';
else if ( TransB == CblasNoTrans ) TA='N';
else
{
cblas_xerbla(2, "cblas_cgemm", "Illegal TransB setting, %d\n", TransB);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_TA = C2F_CHAR(&TA);
F77_TB = C2F_CHAR(&TB);
#endif
F77_cgemm(F77_TA, F77_TB, &F77_N, &F77_M, &F77_K, alpha, B,
&F77_ldb, A, &F77_lda, beta, C, &F77_ldc);
}
else cblas_xerbla(1, "cblas_cgemm", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,162 @@
/*
* cblas_cgemv.c
* The program is a C interface of cgemv
*
* Keita Teranishi 5/20/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cgemv(const CBLAS_LAYOUT layout,
const CBLAS_TRANSPOSE TransA, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY)
{
char TA;
#ifdef F77_CHAR
F77_CHAR F77_TA;
#else
#define F77_TA &TA
#endif
#ifdef F77_INT
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
#else
#define F77_M M
#define F77_N N
#define F77_lda lda
#define F77_incX incx
#define F77_incY incY
#endif
int n=0, i=0, incx=incX;
const float *xx= (const float *)X;
float ALPHA[2],BETA[2];
int tincY, tincx;
float *x=(float *)X, *y=(float *)Y, *st=0, *tx=0;
const float *stx = x;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (TransA == CblasNoTrans) TA = 'N';
else if (TransA == CblasTrans) TA = 'T';
else if (TransA == CblasConjTrans) TA = 'C';
else
{
cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_TA = C2F_CHAR(&TA);
#endif
F77_cgemv(F77_TA, &F77_M, &F77_N, alpha, A, &F77_lda, X, &F77_incX,
beta, Y, &F77_incY);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (TransA == CblasNoTrans) TA = 'T';
else if (TransA == CblasTrans) TA = 'N';
else if (TransA == CblasConjTrans)
{
ALPHA[0]= *( (const float *) alpha );
ALPHA[1]= -( *( (const float *) alpha+1) );
BETA[0]= *( (const float *) beta );
BETA[1]= -( *( (const float *) beta+1 ) );
TA = 'N';
if (M > 0)
{
n = M << 1;
x = malloc(n*sizeof(float));
tx = x;
if( incX > 0 ) {
i = incX << 1 ;
tincx = 2;
st= x+n;
} else {
i = incX *(-2);
tincx = -2;
st = x-2;
x +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != st);
x=tx;
F77_incX = 1;
if(incY > 0)
tincY = incY;
else
tincY = -incY;
y++;
if (N > 0)
{
i = tincY << 1;
n = i * N ;
st = y + n;
do {
*y = -(*y);
y += i;
} while(y != st);
y -= n;
}
stx = x;
}
else stx = (const float *)X;
}
else
{
cblas_xerbla(2, "cblas_cgemv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_TA = C2F_CHAR(&TA);
#endif
if (TransA == CblasConjTrans)
F77_cgemv(F77_TA, &F77_N, &F77_M, ALPHA, A, &F77_lda, stx,
&F77_incX, BETA, Y, &F77_incY);
else
F77_cgemv(F77_TA, &F77_N, &F77_M, alpha, A, &F77_lda, x,
&F77_incX, beta, Y, &F77_incY);
if (TransA == CblasConjTrans)
{
if (x != (const float *)X) free(x);
if (N > 0)
{
do
{
*y = -(*y);
y += i;
}
while (y != st);
}
}
}
else cblas_xerbla(1, "cblas_cgemv", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,84 @@
/*
* cblas_cgerc.c
* The program is a C interface to cgerc.
*
* Keita Teranishi 5/20/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cgerc(const CBLAS_LAYOUT layout, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda)
{
#ifdef F77_INT
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
#else
#define F77_M M
#define F77_N N
#define F77_incX incX
#define F77_incY incy
#define F77_lda lda
#endif
int n, i, tincy, incy=incY;
float *y=(float *)Y, *yy=(float *)Y, *ty, *st;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
F77_cgerc( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A,
&F77_lda);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (N > 0)
{
n = N << 1;
y = malloc(n*sizeof(float));
ty = y;
if( incY > 0 ) {
i = incY << 1;
tincy = 2;
st= y+n;
} else {
i = incY *(-2);
tincy = -2;
st = y-2;
y +=(n-2);
}
do
{
*y = *yy;
y[1] = -yy[1];
y += tincy ;
yy += i;
}
while (y != st);
y = ty;
#ifdef F77_INT
F77_incY = 1;
#else
incy = 1;
#endif
}
else y = (float *) Y;
F77_cgeru( &F77_N, &F77_M, alpha, y, &F77_incY, X, &F77_incX, A,
&F77_lda);
if(Y!=y)
free(y);
} else cblas_xerbla(1, "cblas_cgerc", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,45 @@
/*
* cblas_cgeru.c
* The program is a C interface to cgeru.
*
* Keita Teranishi 5/20/98
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cgeru(const CBLAS_LAYOUT layout, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda)
{
#ifdef F77_INT
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
#else
#define F77_M M
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#define F77_lda lda
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
F77_cgeru( &F77_M, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, A,
&F77_lda);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
F77_cgeru( &F77_N, &F77_M, alpha, Y, &F77_incY, X, &F77_incX, A,
&F77_lda);
}
else cblas_xerbla(1, "cblas_cgeru","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,159 @@
/*
* cblas_chbmv.c
* The program is a C interface to chbmv
*
* Keita Teranishi 5/18/98
*
*/
#include "cblas.h"
#include "cblas_f77.h"
#include <stdio.h>
#include <stdlib.h>
void cblas_chbmv(const CBLAS_LAYOUT layout,
const CBLAS_UPLO Uplo,const int N,const int K,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY)
{
char UL;
#ifdef F77_CHAR
F77_CHAR F77_UL;
#else
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_incX incx
#define F77_incY incY
#endif
int n, i=0, incx=incX;
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
float ALPHA[2],BETA[2];
int tincY, tincx;
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasLower) UL = 'L';
else if (Uplo == CblasUpper) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n",Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chbmv(F77_UL, &F77_N, &F77_K, alpha, A, &F77_lda, X,
&F77_incX, beta, Y, &F77_incY);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
ALPHA[0]= *alp;
ALPHA[1]= -alp[1];
BETA[0]= *bet;
BETA[1]= -bet[1];
if (N > 0)
{
n = N << 1;
x = malloc(n*sizeof(float));
tx = x;
if( incX > 0 ) {
i = incX << 1 ;
tincx = 2;
st= x+n;
} else {
i = incX *(-2);
tincx = -2;
st = x-2;
x +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != st);
x=tx;
#ifdef F77_INT
F77_incX = 1;
#else
incx = 1;
#endif
if(incY > 0)
tincY = incY;
else
tincY = -incY;
y++;
i = tincY << 1;
n = i * N ;
st = y + n;
do {
*y = -(*y);
y += i;
} while(y != st);
y -= n;
} else
x = (float *) X;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chbmv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chbmv(F77_UL, &F77_N, &F77_K, ALPHA,
A ,&F77_lda, x,&F77_incX, BETA, Y, &F77_incY);
}
else
{
cblas_xerbla(1, "cblas_chbmv","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if ( layout == CblasRowMajor )
{
RowMajorStrg = 1;
if(X!=x)
free(x);
if (N > 0)
{
do
{
*y = -(*y);
y += i;
}
while (y != st);
}
}
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,106 @@
/*
*
* cblas_chemm.c
* This program is a C interface to chemm.
* Written by Keita Teranishi
* 4/8/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_chemm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side,
const CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc)
{
char SD, UL;
#ifdef F77_CHAR
F77_CHAR F77_SD, F77_UL;
#else
#define F77_SD &SD
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb;
F77_INT F77_ldc=ldc;
#else
#define F77_M M
#define F77_N N
#define F77_lda lda
#define F77_ldb ldb
#define F77_ldc ldc
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if( layout == CblasColMajor )
{
if( Side == CblasRight) SD='R';
else if ( Side == CblasLeft ) SD='L';
else
{
cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Uplo == CblasUpper) UL='U';
else if ( Uplo == CblasLower ) UL='L';
else
{
cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_SD = C2F_CHAR(&SD);
#endif
F77_chemm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda,
B, &F77_ldb, beta, C, &F77_ldc);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if( Side == CblasRight) SD='L';
else if ( Side == CblasLeft ) SD='R';
else
{
cblas_xerbla(2, "cblas_chemm", "Illegal Side setting, %d\n", Side);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Uplo == CblasUpper) UL='L';
else if ( Uplo == CblasLower ) UL='U';
else
{
cblas_xerbla(3, "cblas_chemm", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_SD = C2F_CHAR(&SD);
#endif
F77_chemm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A,
&F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
}
else cblas_xerbla(1, "cblas_chemm", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,160 @@
/*
* cblas_chemv.c
* The program is a C interface to chemv
*
* Keita Teranishi 5/18/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_chemv(const CBLAS_LAYOUT layout,
const CBLAS_UPLO Uplo, const int N,
const void *alpha, const void *A, const int lda,
const void *X, const int incX, const void *beta,
void *Y, const int incY)
{
char UL;
#ifdef F77_CHAR
F77_CHAR F77_UL;
#else
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_lda lda
#define F77_incX incx
#define F77_incY incY
#endif
int n=0, i=0, incx=incX;
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
float ALPHA[2],BETA[2];
int tincY, tincx;
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasUpper) UL = 'U';
else if (Uplo == CblasLower) UL = 'L';
else
{
cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n",Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chemv(F77_UL, &F77_N, alpha, A, &F77_lda, X, &F77_incX,
beta, Y, &F77_incY);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
ALPHA[0]= *alp;
ALPHA[1]= -alp[1];
BETA[0]= *bet;
BETA[1]= -bet[1];
if (N > 0)
{
n = N << 1;
x = malloc(n*sizeof(float));
tx = x;
if( incX > 0 ) {
i = incX << 1 ;
tincx = 2;
st= x+n;
} else {
i = incX *(-2);
tincx = -2;
st = x-2;
x +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != st);
x=tx;
#ifdef F77_INT
F77_incX = 1;
#else
incx = 1;
#endif
if(incY > 0)
tincY = incY;
else
tincY = -incY;
y++;
i = tincY << 1;
n = i * N ;
st = y + n;
do {
*y = -(*y);
y += i;
} while(y != st);
y -= n;
} else
x = (float *) X;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chemv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chemv(F77_UL, &F77_N, ALPHA, A, &F77_lda, x, &F77_incX,
BETA, Y, &F77_incY);
}
else
{
cblas_xerbla(1, "cblas_chemv","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if ( layout == CblasRowMajor )
{
RowMajorStrg = 1;
if ( X != x )
free(x);
if (N > 0)
{
do
{
*y = -(*y);
y += i;
}
while (y != st);
}
}
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,116 @@
/*
* cblas_cher.c
* The program is a C interface to cher.
*
* Keita Teranishi 5/20/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cher(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const int N, const float alpha, const void *X, const int incX
,void *A, const int lda)
{
char UL;
#ifdef F77_CHAR
F77_CHAR F77_UL;
#else
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX;
#else
#define F77_N N
#define F77_lda lda
#define F77_incX incx
#endif
int n, i, tincx, incx=incX;
float *x=(float *)X, *xx=(float *)X, *tx, *st;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasLower) UL = 'L';
else if (Uplo == CblasUpper) UL = 'U';
else
{
cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n",Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_cher(F77_UL, &F77_N, &alpha, X, &F77_incX, A, &F77_lda);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_cher","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
if (N > 0)
{
n = N << 1;
x = malloc(n*sizeof(float));
tx = x;
if( incX > 0 ) {
i = incX << 1 ;
tincx = 2;
st= x+n;
} else {
i = incX *(-2);
tincx = -2;
st = x-2;
x +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != st);
x=tx;
#ifdef F77_INT
F77_incX = 1;
#else
incx = 1;
#endif
}
else x = (float *) X;
F77_cher(F77_UL, &F77_N, &alpha, x, &F77_incX, A, &F77_lda);
} else
{
cblas_xerbla(1, "cblas_cher","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if(X!=x)
free(x);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,152 @@
/*
* cblas_cher2.c
* The program is a C interface to cher2.
*
* Keita Teranishi 3/23/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cher2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const int N, const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda)
{
char UL;
#ifdef F77_CHAR
F77_CHAR F77_UL;
#else
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_lda=lda, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_lda lda
#define F77_incX incx
#define F77_incY incy
#endif
int n, i, j, tincx, tincy, incx=incX, incy=incY;
float *x=(float *)X, *xx=(float *)X, *y=(float *)Y,
*yy=(float *)Y, *tx, *ty, *stx, *sty;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasLower) UL = 'L';
else if (Uplo == CblasUpper) UL = 'U';
else
{
cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n",Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_cher2(F77_UL, &F77_N, alpha, X, &F77_incX,
Y, &F77_incY, A, &F77_lda);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_cher2","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
if (N > 0)
{
n = N << 1;
x = malloc(n*sizeof(float));
y = malloc(n*sizeof(float));
tx = x;
ty = y;
if( incX > 0 ) {
i = incX << 1 ;
tincx = 2;
stx= x+n;
} else {
i = incX *(-2);
tincx = -2;
stx = x-2;
x +=(n-2);
}
if( incY > 0 ) {
j = incY << 1;
tincy = 2;
sty= y+n;
} else {
j = incY *(-2);
tincy = -2;
sty = y-2;
y +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != stx);
do
{
*y = *yy;
y[1] = -yy[1];
y += tincy ;
yy += j;
}
while (y != sty);
x=tx;
y=ty;
#ifdef F77_INT
F77_incX = 1;
F77_incY = 1;
#else
incx = 1;
incy = 1;
#endif
} else
{
x = (float *) X;
y = (float *) Y;
}
F77_cher2(F77_UL, &F77_N, alpha, y, &F77_incY, x,
&F77_incX, A, &F77_lda);
} else
{
cblas_xerbla(1, "cblas_cher2","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if(X!=x)
free(x);
if(Y!=y)
free(y);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,111 @@
/*
*
* cblas_cher2k.c
* This program is a C interface to cher2k.
* Written by Keita Teranishi
* 4/8/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cher2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const float beta,
void *C, const int ldc)
{
char UL, TR;
#ifdef F77_CHAR
F77_CHAR F77_TR, F77_UL;
#else
#define F77_TR &TR
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
F77_INT F77_ldc=ldc;
#else
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_ldb ldb
#define F77_ldc ldc
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
float ALPHA[2];
const float *alp=(float *)alpha;
CBLAS_CallFromC = 1;
RowMajorStrg = 0;
if( layout == CblasColMajor )
{
if( Uplo == CblasUpper) UL='U';
else if ( Uplo == CblasLower ) UL='L';
else
{
cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='T';
else if ( Trans == CblasConjTrans ) TR='C';
else if ( Trans == CblasNoTrans ) TR='N';
else
{
cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TR = C2F_CHAR(&TR);
#endif
F77_cher2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if( Uplo == CblasUpper) UL='L';
else if ( Uplo == CblasLower ) UL='U';
else
{
cblas_xerbla(2, "cblas_cher2k", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='N';
else if ( Trans == CblasConjTrans ) TR='N';
else if ( Trans == CblasNoTrans ) TR='C';
else
{
cblas_xerbla(3, "cblas_cher2k", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TR = C2F_CHAR(&TR);
#endif
ALPHA[0]= *alp;
ALPHA[1]= -alp[1];
F77_cher2k(F77_UL,F77_TR, &F77_N, &F77_K, ALPHA, A, &F77_lda, B, &F77_ldb, &beta, C, &F77_ldc);
}
else cblas_xerbla(1, "cblas_cher2k", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,105 @@
/*
*
* cblas_cherk.c
* This program is a C interface to cherk.
* Written by Keita Teranishi
* 4/8/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cherk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const CBLAS_TRANSPOSE Trans, const int N, const int K,
const float alpha, const void *A, const int lda,
const float beta, void *C, const int ldc)
{
char UL, TR;
#ifdef F77_CHAR
F77_CHAR F77_TR, F77_UL;
#else
#define F77_TR &TR
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_K=K, F77_lda=lda;
F77_INT F77_ldc=ldc;
#else
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_ldc ldc
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if( layout == CblasColMajor )
{
if( Uplo == CblasUpper) UL='U';
else if ( Uplo == CblasLower ) UL='L';
else
{
cblas_xerbla(2, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='T';
else if ( Trans == CblasConjTrans ) TR='C';
else if ( Trans == CblasNoTrans ) TR='N';
else
{
cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TR = C2F_CHAR(&TR);
#endif
F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda,
&beta, C, &F77_ldc);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if( Uplo == CblasUpper) UL='L';
else if ( Uplo == CblasLower ) UL='U';
else
{
cblas_xerbla(3, "cblas_cherk", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='N';
else if ( Trans == CblasConjTrans ) TR='N';
else if ( Trans == CblasNoTrans ) TR='C';
else
{
cblas_xerbla(3, "cblas_cherk", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_SD = C2F_CHAR(&SD);
#endif
F77_cherk(F77_UL, F77_TR, &F77_N, &F77_K, &alpha, A, &F77_lda,
&beta, C, &F77_ldc);
}
else cblas_xerbla(1, "cblas_cherk", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,160 @@
/*
* cblas_chpmv.c
* The program is a C interface of chpmv
*
* Keita Teranishi 5/18/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_chpmv(const CBLAS_LAYOUT layout,
const CBLAS_UPLO Uplo,const int N,
const void *alpha, const void *AP,
const void *X, const int incX, const void *beta,
void *Y, const int incY)
{
char UL;
#ifdef F77_CHAR
F77_CHAR F77_UL;
#else
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_incX incx
#define F77_incY incY
#endif
int n, i=0, incx=incX;
const float *xx= (float *)X, *alp= (float *)alpha, *bet = (float *)beta;
float ALPHA[2],BETA[2];
int tincY, tincx;
float *x=(float *)X, *y=(float *)Y, *st=0, *tx;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasLower) UL = 'L';
else if (Uplo == CblasUpper) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n",Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chpmv(F77_UL, &F77_N, alpha, AP, X,
&F77_incX, beta, Y, &F77_incY);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
ALPHA[0]= *alp;
ALPHA[1]= -alp[1];
BETA[0]= *bet;
BETA[1]= -bet[1];
if (N > 0)
{
n = N << 1;
x = malloc(n*sizeof(float));
tx = x;
if( incX > 0 ) {
i = incX << 1;
tincx = 2;
st= x+n;
} else {
i = incX *(-2);
tincx = -2;
st = x-2;
x +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != st);
x=tx;
#ifdef F77_INT
F77_incX = 1;
#else
incx = 1;
#endif
if(incY > 0)
tincY = incY;
else
tincY = -incY;
y++;
i = tincY << 1;
n = i * N ;
st = y + n;
do {
*y = -(*y);
y += i;
} while(y != st);
y -= n;
} else
x = (float *) X;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chpmv","Illegal Uplo setting, %d\n", Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chpmv(F77_UL, &F77_N, ALPHA,
AP, x, &F77_incX, BETA, Y, &F77_incY);
}
else
{
cblas_xerbla(1, "cblas_chpmv","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if ( layout == CblasRowMajor )
{
RowMajorStrg = 1;
if(X!=x)
free(x);
if (N > 0)
{
do
{
*y = -(*y);
y += i;
}
while (y != st);
}
}
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,115 @@
/*
* cblas_chpr.c
* The program is a C interface to chpr.
*
* Keita Teranishi 3/23/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_chpr(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const int N, const float alpha, const void *X,
const int incX, void *A)
{
char UL;
#ifdef F77_CHAR
F77_CHAR F77_UL;
#else
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incx
#endif
int n, i, tincx, incx=incX;
float *x=(float *)X, *xx=(float *)X, *tx, *st;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasLower) UL = 'L';
else if (Uplo == CblasUpper) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n",Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chpr(F77_UL, &F77_N, &alpha, X, &F77_incX, A);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chpr","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
if (N > 0)
{
n = N << 1;
x = malloc(n*sizeof(float));
tx = x;
if( incX > 0 ) {
i = incX << 1;
tincx = 2;
st= x+n;
} else {
i = incX *(-2);
tincx = -2;
st = x-2;
x +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != st);
x=tx;
#ifdef F77_INT
F77_incX = 1;
#else
incx = 1;
#endif
}
else x = (float *) X;
F77_chpr(F77_UL, &F77_N, &alpha, x, &F77_incX, A);
} else
{
cblas_xerbla(1, "cblas_chpr","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if(X!=x)
free(x);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,149 @@
/*
* cblas_chpr2.c
* The program is a C interface to chpr2.
*
* Keita Teranishi 5/20/98
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "cblas.h"
#include "cblas_f77.h"
void cblas_chpr2(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const int N,const void *alpha, const void *X,
const int incX,const void *Y, const int incY, void *Ap)
{
char UL;
#ifdef F77_CHAR
F77_CHAR F77_UL;
#else
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_incX incx
#define F77_incY incy
#endif
int n, i, j, tincx, tincy, incx=incX, incy=incY;
float *x=(float *)X, *xx=(float *)X, *y=(float *)Y,
*yy=(float *)Y, *tx, *ty, *stx, *sty;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasLower) UL = 'L';
else if (Uplo == CblasUpper) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n",Uplo );
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
F77_chpr2(F77_UL, &F77_N, alpha, X, &F77_incX, Y, &F77_incY, Ap);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_chpr2","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
#endif
if (N > 0)
{
n = N << 1;
x = malloc(n*sizeof(float));
y = malloc(n*sizeof(float));
tx = x;
ty = y;
if( incX > 0 ) {
i = incX << 1 ;
tincx = 2;
stx= x+n;
} else {
i = incX *(-2);
tincx = -2;
stx = x-2;
x +=(n-2);
}
if( incY > 0 ) {
j = incY << 1;
tincy = 2;
sty= y+n;
} else {
j = incY *(-2);
tincy = -2;
sty = y-2;
y +=(n-2);
}
do
{
*x = *xx;
x[1] = -xx[1];
x += tincx ;
xx += i;
}
while (x != stx);
do
{
*y = *yy;
y[1] = -yy[1];
y += tincy ;
yy += j;
}
while (y != sty);
x=tx;
y=ty;
#ifdef F77_INT
F77_incX = 1;
F77_incY = 1;
#else
incx = 1;
incy = 1;
#endif
} else
{
x = (float *) X;
y = (void *) Y;
}
F77_chpr2(F77_UL, &F77_N, alpha, y, &F77_incY, x, &F77_incX, Ap);
} else
{
cblas_xerbla(1, "cblas_chpr2","Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if(X!=x)
free(x);
if(Y!=y)
free(y);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,21 @@
/*
* cblas_cscal.c
*
* The program is a C interface to cscal.
*
* Written by Keita Teranishi. 2/11/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cscal( const int N, const void *alpha, void *X,
const int incX)
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incX
#endif
F77_cscal( &F77_N, alpha, X, &F77_incX);
}

View File

@ -0,0 +1,21 @@
/*
* cblas_csscal.c
*
* The program is a C interface to csscal.
*
* Written by Keita Teranishi. 2/11/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_csscal( const int N, const float alpha, void *X,
const int incX)
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incX
#endif
F77_csscal( &F77_N, &alpha, X, &F77_incX);
}

View File

@ -0,0 +1,22 @@
/*
* cblas_cswap.c
*
* The program is a C interface to cswap.
*
* Written by Keita Teranishi. 2/11/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_cswap( const int N, void *X, const int incX, void *Y,
const int incY)
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
F77_cswap( &F77_N, X, &F77_incX, Y, &F77_incY);
}

View File

@ -0,0 +1,106 @@
/*
*
* cblas_csymm.c
* This program is a C interface to csymm.
* Written by Keita Teranishi
* 4/8/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_csymm(const CBLAS_LAYOUT layout, const CBLAS_SIDE Side,
const CBLAS_UPLO Uplo, const int M, const int N,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc)
{
char SD, UL;
#ifdef F77_CHAR
F77_CHAR F77_SD, F77_UL;
#else
#define F77_SD &SD
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_M=M, F77_N=N, F77_lda=lda, F77_ldb=ldb;
F77_INT F77_ldc=ldc;
#else
#define F77_M M
#define F77_N N
#define F77_lda lda
#define F77_ldb ldb
#define F77_ldc ldc
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if( layout == CblasColMajor )
{
if( Side == CblasRight) SD='R';
else if ( Side == CblasLeft ) SD='L';
else
{
cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Uplo == CblasUpper) UL='U';
else if ( Uplo == CblasLower ) UL='L';
else
{
cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_SD = C2F_CHAR(&SD);
#endif
F77_csymm(F77_SD, F77_UL, &F77_M, &F77_N, alpha, A, &F77_lda,
B, &F77_ldb, beta, C, &F77_ldc);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if( Side == CblasRight) SD='L';
else if ( Side == CblasLeft ) SD='R';
else
{
cblas_xerbla(2, "cblas_csymm", "Illegal Side setting, %d\n", Side);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Uplo == CblasUpper) UL='L';
else if ( Uplo == CblasLower ) UL='U';
else
{
cblas_xerbla(3, "cblas_csymm", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_SD = C2F_CHAR(&SD);
#endif
F77_csymm(F77_SD, F77_UL, &F77_N, &F77_M, alpha, A, &F77_lda,
B, &F77_ldb, beta, C, &F77_ldc);
}
else cblas_xerbla(1, "cblas_csymm", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,108 @@
/*
*
* cblas_csyr2k.c
* This program is a C interface to csyr2k.
* Written by Keita Teranishi
* 4/8/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_csyr2k(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta,
void *C, const int ldc)
{
char UL, TR;
#ifdef F77_CHAR
F77_CHAR F77_TR, F77_UL;
#else
#define F77_TR &TR
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_K=K, F77_lda=lda, F77_ldb=ldb;
F77_INT F77_ldc=ldc;
#else
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_ldb ldb
#define F77_ldc ldc
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if( layout == CblasColMajor )
{
if( Uplo == CblasUpper) UL='U';
else if ( Uplo == CblasLower ) UL='L';
else
{
cblas_xerbla(2, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='T';
else if ( Trans == CblasConjTrans ) TR='C';
else if ( Trans == CblasNoTrans ) TR='N';
else
{
cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TR = C2F_CHAR(&TR);
#endif
F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
B, &F77_ldb, beta, C, &F77_ldc);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if( Uplo == CblasUpper) UL='L';
else if ( Uplo == CblasLower ) UL='U';
else
{
cblas_xerbla(3, "cblas_csyr2k", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='N';
else if ( Trans == CblasConjTrans ) TR='N';
else if ( Trans == CblasNoTrans ) TR='T';
else
{
cblas_xerbla(3, "cblas_csyr2k", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TR = C2F_CHAR(&TR);
#endif
F77_csyr2k(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda, B, &F77_ldb, beta, C, &F77_ldc);
}
else cblas_xerbla(1, "cblas_csyr2k", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,108 @@
/*
*
* cblas_csyrk.c
* This program is a C interface to csyrk.
* Written by Keita Teranishi
* 4/8/1998
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_csyrk(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const CBLAS_TRANSPOSE Trans, const int N, const int K,
const void *alpha, const void *A, const int lda,
const void *beta, void *C, const int ldc)
{
char UL, TR;
#ifdef F77_CHAR
F77_CHAR F77_TR, F77_UL;
#else
#define F77_TR &TR
#define F77_UL &UL
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_K=K, F77_lda=lda;
F77_INT F77_ldc=ldc;
#else
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_ldc ldc
#endif
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if( layout == CblasColMajor )
{
if( Uplo == CblasUpper) UL='U';
else if ( Uplo == CblasLower ) UL='L';
else
{
cblas_xerbla(2, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='T';
else if ( Trans == CblasConjTrans ) TR='C';
else if ( Trans == CblasNoTrans ) TR='N';
else
{
cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TR = C2F_CHAR(&TR);
#endif
F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
beta, C, &F77_ldc);
} else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if( Uplo == CblasUpper) UL='L';
else if ( Uplo == CblasLower ) UL='U';
else
{
cblas_xerbla(3, "cblas_csyrk", "Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if( Trans == CblasTrans) TR ='N';
else if ( Trans == CblasConjTrans ) TR='N';
else if ( Trans == CblasNoTrans ) TR='T';
else
{
cblas_xerbla(3, "cblas_csyrk", "Illegal Trans setting, %d\n", Trans);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TR = C2F_CHAR(&TR);
#endif
F77_csyrk(F77_UL, F77_TR, &F77_N, &F77_K, alpha, A, &F77_lda,
beta, C, &F77_ldc);
}
else cblas_xerbla(1, "cblas_csyrk", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,158 @@
/*
* cblas_ctbmv.c
* The program is a C interface to ctbmv.
*
* Keita Teranishi 5/20/98
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_ctbmv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX)
{
char TA;
char UL;
char DI;
#ifdef F77_CHAR
F77_CHAR F77_TA, F77_UL, F77_DI;
#else
#define F77_TA &TA
#define F77_UL &UL
#define F77_DI &DI
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX;
#else
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_incX incX
#endif
int n, i=0, tincX;
float *st=0, *x=(float *)X;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasUpper) UL = 'U';
else if (Uplo == CblasLower) UL = 'L';
else
{
cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (TransA == CblasNoTrans) TA = 'N';
else if (TransA == CblasTrans) TA = 'T';
else if (TransA == CblasConjTrans) TA = 'C';
else
{
cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (Diag == CblasUnit) DI = 'U';
else if (Diag == CblasNonUnit) DI = 'N';
else
{
cblas_xerbla(4, "cblas_ctbmv","Illegal Diag setting, %d\n", Diag);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TA = C2F_CHAR(&TA);
F77_DI = C2F_CHAR(&DI);
#endif
F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
&F77_incX);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (TransA == CblasNoTrans) TA = 'T';
else if (TransA == CblasTrans) TA = 'N';
else if (TransA == CblasConjTrans)
{
TA = 'N';
if ( N > 0)
{
if(incX > 0)
tincX = incX;
else
tincX = -incX;
i = tincX << 1;
n = i * N;
x++;
st = x + n;
do
{
*x = -(*x);
x+= i;
}
while (x != st);
x -= n;
}
}
else
{
cblas_xerbla(3, "cblas_ctbmv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (Diag == CblasUnit) DI = 'U';
else if (Diag == CblasNonUnit) DI = 'N';
else
{
cblas_xerbla(4, "cblas_ctbmv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TA = C2F_CHAR(&TA);
F77_DI = C2F_CHAR(&DI);
#endif
F77_ctbmv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
&F77_incX);
if (TransA == CblasConjTrans)
{
if (N > 0)
{
do
{
*x = -(*x);
x += i;
}
while (x != st);
}
}
}
else cblas_xerbla(1, "cblas_ctbmv", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,162 @@
/*
* cblas_ctbsv.c
* The program is a C interface to ctbsv.
*
* Keita Teranishi 3/23/98
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_ctbsv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
const int N, const int K, const void *A, const int lda,
void *X, const int incX)
{
char TA;
char UL;
char DI;
#ifdef F77_CHAR
F77_CHAR F77_TA, F77_UL, F77_DI;
#else
#define F77_TA &TA
#define F77_UL &UL
#define F77_DI &DI
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_lda=lda, F77_K=K, F77_incX=incX;
#else
#define F77_N N
#define F77_K K
#define F77_lda lda
#define F77_incX incX
#endif
int n, i=0, tincX;
float *st=0,*x=(float *)X;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasUpper) UL = 'U';
else if (Uplo == CblasLower) UL = 'L';
else
{
cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (TransA == CblasNoTrans) TA = 'N';
else if (TransA == CblasTrans) TA = 'T';
else if (TransA == CblasConjTrans) TA = 'C';
else
{
cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (Diag == CblasUnit) DI = 'U';
else if (Diag == CblasNonUnit) DI = 'N';
else
{
cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TA = C2F_CHAR(&TA);
F77_DI = C2F_CHAR(&DI);
#endif
F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
&F77_incX);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_ctbsv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (TransA == CblasNoTrans) TA = 'T';
else if (TransA == CblasTrans) TA = 'N';
else if (TransA == CblasConjTrans)
{
TA = 'N';
if ( N > 0)
{
if ( incX > 0 )
tincX = incX;
else
tincX = -incX;
n = N*2*(tincX);
x++;
st=x+n;
i = tincX << 1;
do
{
*x = -(*x);
x+=i;
}
while (x != st);
x -= n;
}
}
else
{
cblas_xerbla(3, "cblas_ctbsv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (Diag == CblasUnit) DI = 'U';
else if (Diag == CblasNonUnit) DI = 'N';
else
{
cblas_xerbla(4, "cblas_ctbsv","Illegal Diag setting, %d\n", Diag);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TA = C2F_CHAR(&TA);
F77_DI = C2F_CHAR(&DI);
#endif
F77_ctbsv( F77_UL, F77_TA, F77_DI, &F77_N, &F77_K, A, &F77_lda, X,
&F77_incX);
if (TransA == CblasConjTrans)
{
if (N > 0)
{
do
{
*x = -(*x);
x+= i;
}
while (x != st);
}
}
}
else cblas_xerbla(1, "cblas_ctbsv", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

View File

@ -0,0 +1,152 @@
/*
* cblas_ctpmv.c
* The program is a C interface to ctpmv.
*
* Keita Teranishi 5/20/98
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_ctpmv(const CBLAS_LAYOUT layout, const CBLAS_UPLO Uplo,
const CBLAS_TRANSPOSE TransA, const CBLAS_DIAG Diag,
const int N, const void *Ap, void *X, const int incX)
{
char TA;
char UL;
char DI;
#ifdef F77_CHAR
F77_CHAR F77_TA, F77_UL, F77_DI;
#else
#define F77_TA &TA
#define F77_UL &UL
#define F77_DI &DI
#endif
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#define F77_N N
#define F77_incX incX
#endif
int n, i=0, tincX;
float *st=0,*x=(float *)X;
extern int CBLAS_CallFromC;
extern int RowMajorStrg;
RowMajorStrg = 0;
CBLAS_CallFromC = 1;
if (layout == CblasColMajor)
{
if (Uplo == CblasUpper) UL = 'U';
else if (Uplo == CblasLower) UL = 'L';
else
{
cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (TransA == CblasNoTrans) TA = 'N';
else if (TransA == CblasTrans) TA = 'T';
else if (TransA == CblasConjTrans) TA = 'C';
else
{
cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (Diag == CblasUnit) DI = 'U';
else if (Diag == CblasNonUnit) DI = 'N';
else
{
cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TA = C2F_CHAR(&TA);
F77_DI = C2F_CHAR(&DI);
#endif
F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X, &F77_incX);
}
else if (layout == CblasRowMajor)
{
RowMajorStrg = 1;
if (Uplo == CblasUpper) UL = 'L';
else if (Uplo == CblasLower) UL = 'U';
else
{
cblas_xerbla(2, "cblas_ctpmv","Illegal Uplo setting, %d\n", Uplo);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (TransA == CblasNoTrans) TA = 'T';
else if (TransA == CblasTrans) TA = 'N';
else if (TransA == CblasConjTrans)
{
TA = 'N';
if ( N > 0)
{
if(incX > 0)
tincX = incX;
else
tincX = -incX;
i = tincX << 1;
n = i * N;
x++;
st = x + n;
do
{
*x = -(*x);
x += i;
}
while (x != st);
x -= n;
}
}
else
{
cblas_xerbla(3, "cblas_ctpmv","Illegal TransA setting, %d\n", TransA);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
if (Diag == CblasUnit) DI = 'U';
else if (Diag == CblasNonUnit) DI = 'N';
else
{
cblas_xerbla(4, "cblas_ctpmv","Illegal Diag setting, %d\n", Diag);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}
#ifdef F77_CHAR
F77_UL = C2F_CHAR(&UL);
F77_TA = C2F_CHAR(&TA);
F77_DI = C2F_CHAR(&DI);
#endif
F77_ctpmv( F77_UL, F77_TA, F77_DI, &F77_N, Ap, X,&F77_incX);
if (TransA == CblasConjTrans)
{
if (N > 0)
{
do
{
*x = -(*x);
x += i;
}
while (x != st);
}
}
}
else cblas_xerbla(1, "cblas_ctpmv", "Illegal layout setting, %d\n", layout);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;
}

Some files were not shown because too many files have changed in this diff Show More