update
This commit is contained in:
commit
80db5f11e1
|
@ -178,4 +178,4 @@ In chronological order:
|
||||||
* [2019-11-06] optimize AVX512 SGEMM
|
* [2019-11-06] optimize AVX512 SGEMM
|
||||||
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
|
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
|
||||||
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
|
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
|
||||||
* [2019-12-27] AVX2 CGEMM3M kernel
|
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
|
||||||
|
|
20
Makefile
20
Makefile
|
@ -247,21 +247,21 @@ prof_lapack : lapack_prebuild
|
||||||
|
|
||||||
lapack_prebuild :
|
lapack_prebuild :
|
||||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||||
|
@ -319,7 +319,7 @@ lapack-test :
|
||||||
ifneq ($(CROSS), 1)
|
ifneq ($(CROSS), 1)
|
||||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
|
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
|
||||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
lapack-runtest:
|
lapack-runtest:
|
||||||
|
|
|
@ -25,6 +25,8 @@ else ifeq ($(ARCH), i386)
|
||||||
override ARCH=x86
|
override ARCH=x86
|
||||||
else ifeq ($(ARCH), aarch64)
|
else ifeq ($(ARCH), aarch64)
|
||||||
override ARCH=arm64
|
override ARCH=arm64
|
||||||
|
else ifeq ($(ARCH), zarch)
|
||||||
|
override ARCH=zarch
|
||||||
endif
|
endif
|
||||||
|
|
||||||
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
|
||||||
|
@ -558,6 +560,11 @@ DYNAMIC_CORE += THUNDERX2T99
|
||||||
DYNAMIC_CORE += TSV110
|
DYNAMIC_CORE += TSV110
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifeq ($(ARCH), zarch)
|
||||||
|
DYNAMIC_CORE = Z13
|
||||||
|
DYNAMIC_CORE += Z14
|
||||||
|
endif
|
||||||
|
|
||||||
ifeq ($(ARCH), power)
|
ifeq ($(ARCH), power)
|
||||||
DYNAMIC_CORE = POWER6
|
DYNAMIC_CORE = POWER6
|
||||||
DYNAMIC_CORE += POWER8
|
DYNAMIC_CORE += POWER8
|
||||||
|
|
|
@ -115,7 +115,9 @@ set(SLASRC
|
||||||
stplqt.f stplqt2.f stpmlqt.f
|
stplqt.f stplqt2.f stpmlqt.f
|
||||||
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
|
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
|
||||||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
|
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
|
||||||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f)
|
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
|
||||||
|
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f
|
||||||
|
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
|
||||||
|
|
||||||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
|
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
|
||||||
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
|
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
|
||||||
|
@ -210,7 +212,9 @@ set(CLASRC
|
||||||
ctplqt.f ctplqt2.f ctpmlqt.f
|
ctplqt.f ctplqt2.f ctpmlqt.f
|
||||||
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
|
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
|
||||||
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
|
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
|
||||||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f)
|
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
|
||||||
|
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
|
||||||
|
cungtsqr.f cunhr_col.f )
|
||||||
|
|
||||||
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
|
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
|
||||||
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
|
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
|
||||||
|
@ -299,7 +303,9 @@ set(DLASRC
|
||||||
dtplqt.f dtplqt2.f dtpmlqt.f
|
dtplqt.f dtplqt2.f dtpmlqt.f
|
||||||
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
|
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
|
||||||
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
|
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
|
||||||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f)
|
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
|
||||||
|
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
|
||||||
|
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f )
|
||||||
|
|
||||||
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
|
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
|
||||||
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
|
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
|
||||||
|
@ -398,7 +404,9 @@ set(ZLASRC
|
||||||
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
|
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
|
||||||
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
|
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
|
||||||
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
|
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
|
||||||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f)
|
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
|
||||||
|
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
|
||||||
|
zungtsqr.f zunhr_col.f)
|
||||||
|
|
||||||
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
|
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
|
||||||
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
|
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
|
||||||
|
|
|
@ -715,6 +715,8 @@ set(DSRC
|
||||||
lapacke_dgesv_work.c
|
lapacke_dgesv_work.c
|
||||||
lapacke_dgesvd.c
|
lapacke_dgesvd.c
|
||||||
lapacke_dgesvd_work.c
|
lapacke_dgesvd_work.c
|
||||||
|
lapacke_dgesvdq.c
|
||||||
|
lapacke_dgesvdq_work.c
|
||||||
lapacke_dgesvdx.c
|
lapacke_dgesvdx.c
|
||||||
lapacke_dgesvdx_work.c
|
lapacke_dgesvdx_work.c
|
||||||
lapacke_dgesvj.c
|
lapacke_dgesvj.c
|
||||||
|
@ -1287,6 +1289,8 @@ set(SSRC
|
||||||
lapacke_sgesv_work.c
|
lapacke_sgesv_work.c
|
||||||
lapacke_sgesvd.c
|
lapacke_sgesvd.c
|
||||||
lapacke_sgesvd_work.c
|
lapacke_sgesvd_work.c
|
||||||
|
lapacke_sgesvdq.c
|
||||||
|
lapacke_sgesvdq_work.c
|
||||||
lapacke_sgesvdx.c
|
lapacke_sgesvdx.c
|
||||||
lapacke_sgesvdx_work.c
|
lapacke_sgesvdx_work.c
|
||||||
lapacke_sgesvj.c
|
lapacke_sgesvj.c
|
||||||
|
@ -1853,6 +1857,8 @@ set(ZSRC
|
||||||
lapacke_zgesv_work.c
|
lapacke_zgesv_work.c
|
||||||
lapacke_zgesvd.c
|
lapacke_zgesvd.c
|
||||||
lapacke_zgesvd_work.c
|
lapacke_zgesvd_work.c
|
||||||
|
lapacke_zgesvdq.c
|
||||||
|
lapacke_zgesvdq_work.c
|
||||||
lapacke_zgesvdx.c
|
lapacke_zgesvdx.c
|
||||||
lapacke_zgesvdx_work.c
|
lapacke_zgesvdx_work.c
|
||||||
lapacke_zgesvj.c
|
lapacke_zgesvj.c
|
||||||
|
|
|
@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
7 NUMBER OF VALUES OF N
|
6 NUMBER OF VALUES OF N
|
||||||
1 2 3 5 7 9 35 VALUES OF N
|
1 2 3 5 7 9 35 VALUES OF N
|
||||||
3 NUMBER OF VALUES OF ALPHA
|
3 NUMBER OF VALUES OF ALPHA
|
||||||
0.0 1.0 0.7 VALUES OF ALPHA
|
0.0 1.0 0.7 VALUES OF ALPHA
|
||||||
|
|
|
@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES.
|
||||||
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
T LOGICAL FLAG, T TO TEST ERROR EXITS.
|
||||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
|
||||||
16.0 THRESHOLD VALUE OF TEST RATIO
|
16.0 THRESHOLD VALUE OF TEST RATIO
|
||||||
7 NUMBER OF VALUES OF N
|
6 NUMBER OF VALUES OF N
|
||||||
0 1 2 3 5 9 35 VALUES OF N
|
0 1 2 3 5 9 35 VALUES OF N
|
||||||
3 NUMBER OF VALUES OF ALPHA
|
3 NUMBER OF VALUES OF ALPHA
|
||||||
0.0 1.0 0.7 VALUES OF ALPHA
|
0.0 1.0 0.7 VALUES OF ALPHA
|
||||||
|
|
|
@ -21,9 +21,13 @@ else
|
||||||
ifeq ($(ARCH),power)
|
ifeq ($(ARCH),power)
|
||||||
COMMONOBJS += dynamic_power.$(SUFFIX)
|
COMMONOBJS += dynamic_power.$(SUFFIX)
|
||||||
else
|
else
|
||||||
|
ifeq ($(ARCH),zarch)
|
||||||
|
COMMONOBJS += dynamic_zarch.$(SUFFIX)
|
||||||
|
else
|
||||||
COMMONOBJS += dynamic.$(SUFFIX)
|
COMMONOBJS += dynamic.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
COMMONOBJS += parameter.$(SUFFIX)
|
COMMONOBJS += parameter.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
@ -85,9 +89,13 @@ else
|
||||||
ifeq ($(ARCH),power)
|
ifeq ($(ARCH),power)
|
||||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX)
|
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX)
|
||||||
else
|
else
|
||||||
|
ifeq ($(ARCH),zarch)
|
||||||
|
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX)
|
||||||
|
else
|
||||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
endif
|
||||||
else
|
else
|
||||||
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
|
|
@ -0,0 +1,131 @@
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
extern gotoblas_t gotoblas_Z13;
|
||||||
|
extern gotoblas_t gotoblas_Z14;
|
||||||
|
extern gotoblas_t gotoblas_Z15;
|
||||||
|
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||||
|
//extern gotoblas_t gotoblas_Z14;
|
||||||
|
//#endif
|
||||||
|
|
||||||
|
#define NUM_CORETYPES 5
|
||||||
|
|
||||||
|
extern void openblas_warning(int verbose, const char* msg);
|
||||||
|
|
||||||
|
static char* corename[] = {
|
||||||
|
"unknown",
|
||||||
|
"Z13",
|
||||||
|
"Z14",
|
||||||
|
"Z15",
|
||||||
|
"ZARCH_GENERIC",
|
||||||
|
};
|
||||||
|
|
||||||
|
char* gotoblas_corename(void) {
|
||||||
|
if (gotoblas == &gotoblas_Z13) return corename[1];
|
||||||
|
if (gotoblas == &gotoblas_Z14) return corename[2];
|
||||||
|
if (gotoblas == &gotoblas_Z15) return corename[3];
|
||||||
|
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||||
|
// if (gotoblas == &gotoblas_POWER9) return corename[3];
|
||||||
|
//#endif
|
||||||
|
return corename[0]; // try generic?
|
||||||
|
}
|
||||||
|
|
||||||
|
// __builtin_cpu_is is not supported by zarch
|
||||||
|
static gotolabs_t* get_coretype(void) {
|
||||||
|
FILE* infile;
|
||||||
|
char buffer[512], * p;
|
||||||
|
|
||||||
|
p = (char*)NULL;
|
||||||
|
infile = fopen("/proc/sysinfo", "r");
|
||||||
|
while (fgets(buffer, sizeof(buffer), infile)) {
|
||||||
|
if (!strncmp("Type", buffer, 4)) {
|
||||||
|
p = strchr(buffer, ':') + 2;
|
||||||
|
#if 0
|
||||||
|
fprintf(stderr, "%s\n", p);
|
||||||
|
#endif
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fclose(infile);
|
||||||
|
|
||||||
|
if (strstr(p, "2964")) return &gotoblas_Z13;
|
||||||
|
if (strstr(p, "2965")) return &gotoblas_Z13;
|
||||||
|
if (strstr(p, "3906")) return &gotoblas_Z14;
|
||||||
|
if (strstr(p, "3907")) return &gotoblas_Z14;
|
||||||
|
if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14
|
||||||
|
if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14
|
||||||
|
|
||||||
|
return NULL; // should be ZARCH_GENERIC
|
||||||
|
}
|
||||||
|
|
||||||
|
static gotoblas_t* force_coretype(char* coretype) {
|
||||||
|
|
||||||
|
int i;
|
||||||
|
int found = -1;
|
||||||
|
char message[128];
|
||||||
|
|
||||||
|
for (i = 0; i < NUM_CORETYPES; i++)
|
||||||
|
{
|
||||||
|
if (!strncasecmp(coretype, corename[i], 20))
|
||||||
|
{
|
||||||
|
found = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (found)
|
||||||
|
{
|
||||||
|
case 1: return (&gotoblas_Z13);
|
||||||
|
case 2: return (&gotoblas_Z14);
|
||||||
|
case 3: return (&gotoblas_Z15);
|
||||||
|
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
|
||||||
|
// case 3: return (&gotoblas_POWER9);
|
||||||
|
//#endif
|
||||||
|
default: return NULL;
|
||||||
|
}
|
||||||
|
snprintf(message, 128, "Core not found: %s\n", coretype);
|
||||||
|
openblas_warning(1, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
void gotoblas_dynamic_init(void) {
|
||||||
|
|
||||||
|
char coremsg[128];
|
||||||
|
char coren[22];
|
||||||
|
char* p;
|
||||||
|
|
||||||
|
|
||||||
|
if (gotoblas) return;
|
||||||
|
|
||||||
|
p = getenv("OPENBLAS_CORETYPE");
|
||||||
|
if (p)
|
||||||
|
{
|
||||||
|
gotoblas = force_coretype(p);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
gotoblas = get_coretype();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gotoblas == NULL)
|
||||||
|
{
|
||||||
|
snprintf(coremsg, 128, "Falling back to Z14 core\n");
|
||||||
|
openblas_warning(1, coremsg);
|
||||||
|
gotoblas = &gotoblas_Z14;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (gotoblas && gotoblas->init) {
|
||||||
|
strncpy(coren, gotoblas_corename(), 20);
|
||||||
|
sprintf(coremsg, "Core: %s\n", coren);
|
||||||
|
openblas_warning(2, coremsg);
|
||||||
|
gotoblas->init();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void gotoblas_dynamic_quit(void) {
|
||||||
|
gotoblas = NULL;
|
||||||
|
}
|
|
@ -694,7 +694,19 @@
|
||||||
|
|
||||||
# functions added for lapack-3.8.0
|
# functions added for lapack-3.8.0
|
||||||
|
|
||||||
ilaenv2stage
|
ilaenv2stage,
|
||||||
|
|
||||||
|
# functions added for lapack-3.9.0
|
||||||
|
cgesvdq,
|
||||||
|
cungtsqr,
|
||||||
|
dcombssq,
|
||||||
|
dgesvdq,
|
||||||
|
dorgtsqr,
|
||||||
|
scombssq,
|
||||||
|
sgesvdq,
|
||||||
|
sorgtsqr,
|
||||||
|
zgesvdq,
|
||||||
|
zungtsqr
|
||||||
);
|
);
|
||||||
|
|
||||||
@lapack_extendedprecision_objs = (
|
@lapack_extendedprecision_objs = (
|
||||||
|
@ -3347,6 +3359,15 @@
|
||||||
LAPACKE_zsytrf_aa_2stage_work,
|
LAPACKE_zsytrf_aa_2stage_work,
|
||||||
LAPACKE_zsytrs_aa_2stage,
|
LAPACKE_zsytrs_aa_2stage,
|
||||||
LAPACKE_zsytrs_aa_2stage_work,
|
LAPACKE_zsytrs_aa_2stage_work,
|
||||||
|
|
||||||
|
# new functions from 3.9.0
|
||||||
|
LAPACKE_dgesvdq,
|
||||||
|
LAPACKE_dgesvdq_work,
|
||||||
|
LAPACKE_sgesvdq,
|
||||||
|
LAPACKE_sgesvdq_work,
|
||||||
|
LAPACKE_zgesvdq,
|
||||||
|
LAPACKE_zgesvdq_work
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
#These function may need 2 underscores.
|
#These function may need 2 underscores.
|
||||||
|
@ -3419,7 +3440,13 @@
|
||||||
dsytrf_aa_2stage, dsytrs_aa_2stage,
|
dsytrf_aa_2stage, dsytrs_aa_2stage,
|
||||||
zhesv_aa_2stage, zhetrf_aa_2stage,
|
zhesv_aa_2stage, zhetrf_aa_2stage,
|
||||||
zhetrs_aa_2stage, zsysv_aa_2stage,
|
zhetrs_aa_2stage, zsysv_aa_2stage,
|
||||||
zsytrf_aa_2stage, zsytrs_aa_2stage
|
zsytrf_aa_2stage, zsytrs_aa_2stage,
|
||||||
|
# 3.9.0
|
||||||
|
claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col,
|
||||||
|
dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col,
|
||||||
|
slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col,
|
||||||
|
zlaunhr_col_getrfnp, zlaunhr_col_getrfnp2, zunhr_col
|
||||||
|
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -103,26 +103,34 @@ ZDOTKERNEL = zdot.S
|
||||||
DSDOTKERNEL = dot.S
|
DSDOTKERNEL = dot.S
|
||||||
|
|
||||||
DGEMM_BETA = dgemm_beta.S
|
DGEMM_BETA = dgemm_beta.S
|
||||||
|
SGEMM_BETA = sgemm_beta.S
|
||||||
|
|
||||||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
ifeq ($(SGEMM_UNROLL_N), 4)
|
ifeq ($(SGEMM_UNROLL_M), 16)
|
||||||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
|
||||||
|
else
|
||||||
|
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||||
|
endif
|
||||||
|
ifeq ($(SGEMM_UNROLL_M), 4)
|
||||||
|
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
|
||||||
else
|
else
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||||
endif
|
endif
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(SGEMM_UNROLL_N), 16)
|
||||||
|
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
|
||||||
|
else
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||||
|
endif
|
||||||
ifeq ($(SGEMM_UNROLL_N), 4)
|
ifeq ($(SGEMM_UNROLL_N), 4)
|
||||||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
||||||
else
|
else
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||||
endif
|
endif
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
|
|
@ -109,22 +109,29 @@ ZGEMVTKERNEL = zgemv_t.S
|
||||||
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
|
||||||
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
|
||||||
ifeq ($(SGEMM_UNROLL_N), 4)
|
ifeq ($(SGEMM_UNROLL_M), 16)
|
||||||
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
|
||||||
|
else
|
||||||
|
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
||||||
|
endif
|
||||||
|
ifeq ($(SGEMM_UNROLL_M), 4)
|
||||||
|
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
|
||||||
else
|
else
|
||||||
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
|
||||||
endif
|
endif
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
|
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
endif
|
endif
|
||||||
|
ifeq ($(SGEMM_UNROLL_N), 16)
|
||||||
|
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
|
||||||
|
else
|
||||||
|
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
||||||
|
endif
|
||||||
ifeq ($(SGEMM_UNROLL_N), 4)
|
ifeq ($(SGEMM_UNROLL_N), 4)
|
||||||
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
|
||||||
else
|
else
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
|
||||||
endif
|
endif
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
|
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
|
|
@ -43,7 +43,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
#define betaV0 v11.d[0]
|
#define betaV0 v11.d[0]
|
||||||
#define I x16
|
#define I x16
|
||||||
|
|
||||||
#define size 128
|
#define prfm_size 640
|
||||||
|
#define calc_size 128
|
||||||
|
|
||||||
/**************************************************************************************
|
/**************************************************************************************
|
||||||
* Macro definitions
|
* Macro definitions
|
||||||
|
@ -126,20 +127,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
fmul v2.2d, v2.2d, betaV0
|
fmul v2.2d, v2.2d, betaV0
|
||||||
fmul v3.2d, v3.2d, betaV0
|
fmul v3.2d, v3.2d, betaV0
|
||||||
|
|
||||||
|
prfm PLDL1KEEP, [A01, prfm_size]
|
||||||
|
|
||||||
fmul v4.2d, v4.2d, betaV0
|
fmul v4.2d, v4.2d, betaV0
|
||||||
fmul v5.2d, v5.2d, betaV0
|
fmul v5.2d, v5.2d, betaV0
|
||||||
|
|
||||||
|
prfm PLDL1KEEP, [A03, prfm_size]
|
||||||
|
|
||||||
fmul v6.2d, v6.2d, betaV0
|
fmul v6.2d, v6.2d, betaV0
|
||||||
fmul v7.2d, v7.2d, betaV0
|
fmul v7.2d, v7.2d, betaV0
|
||||||
|
|
||||||
st1 {v0.2d, v1.2d}, [A01]
|
st1 {v0.2d, v1.2d}, [A01]
|
||||||
add A01, A01, size
|
add A01, A01, calc_size
|
||||||
st1 {v2.2d, v3.2d}, [A02]
|
st1 {v2.2d, v3.2d}, [A02]
|
||||||
add A02, A02, size
|
add A02, A02, calc_size
|
||||||
st1 {v4.2d, v5.2d}, [A03]
|
st1 {v4.2d, v5.2d}, [A03]
|
||||||
add A03, A03, size
|
add A03, A03, calc_size
|
||||||
st1 {v6.2d, v7.2d}, [A04]
|
st1 {v6.2d, v7.2d}, [A04]
|
||||||
add A04, A04, size
|
add A04, A04, calc_size
|
||||||
|
|
||||||
subs I , I , #1
|
subs I , I , #1
|
||||||
bne .Lgemm_beta_03
|
bne .Lgemm_beta_03
|
||||||
|
|
|
@ -0,0 +1,259 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2016, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are
|
||||||
|
met:
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in
|
||||||
|
the documentation and/or other materials provided with the
|
||||||
|
distribution.
|
||||||
|
3. Neither the name of the OpenBLAS project nor the names of
|
||||||
|
its contributors may be used to endorse or promote products
|
||||||
|
derived from this software without specific prior written permission.
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A00 PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||||
|
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#define M x0
|
||||||
|
#define N x1
|
||||||
|
#define BETA s0
|
||||||
|
#define LDC x6
|
||||||
|
#define C00 x7
|
||||||
|
|
||||||
|
#define A01 x8
|
||||||
|
#define A02 x9
|
||||||
|
#define A03 x10
|
||||||
|
#define A04 x11
|
||||||
|
#define I x12
|
||||||
|
|
||||||
|
#define beta0 s11
|
||||||
|
#define betaV0 v11.s[0]
|
||||||
|
|
||||||
|
#define prfm_size 640
|
||||||
|
#define calc_size 128
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* Macro definitions
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
|
.macro SAVE_REGS
|
||||||
|
add sp, sp, #-(11 * 16)
|
||||||
|
stp d8, d9, [sp, #(0 * 16)]
|
||||||
|
stp d10, d11, [sp, #(1 * 16)]
|
||||||
|
stp d12, d13, [sp, #(2 * 16)]
|
||||||
|
stp d14, d15, [sp, #(3 * 16)]
|
||||||
|
stp d16, d17, [sp, #(4 * 16)]
|
||||||
|
stp x18, x19, [sp, #(5 * 16)]
|
||||||
|
stp x20, x21, [sp, #(6 * 16)]
|
||||||
|
stp x22, x23, [sp, #(7 * 16)]
|
||||||
|
stp x24, x25, [sp, #(8 * 16)]
|
||||||
|
stp x26, x27, [sp, #(9 * 16)]
|
||||||
|
str x28, [sp, #(10 * 16)]
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RESTORE_REGS
|
||||||
|
ldp d8, d9, [sp, #(0 * 16)]
|
||||||
|
ldp d10, d11, [sp, #(1 * 16)]
|
||||||
|
ldp d12, d13, [sp, #(2 * 16)]
|
||||||
|
ldp d14, d15, [sp, #(3 * 16)]
|
||||||
|
ldp d16, d17, [sp, #(4 * 16)]
|
||||||
|
ldp x18, x19, [sp, #(5 * 16)]
|
||||||
|
ldp x20, x21, [sp, #(6 * 16)]
|
||||||
|
ldp x22, x23, [sp, #(7 * 16)]
|
||||||
|
ldp x24, x25, [sp, #(8 * 16)]
|
||||||
|
ldp x26, x27, [sp, #(9 * 16)]
|
||||||
|
ldr x28, [sp, #(10 * 16)]
|
||||||
|
add sp, sp, #(11*16)
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro INIT_ZERO
|
||||||
|
fmul v0.4s, v0.4s, betaV0
|
||||||
|
fmul v1.4s, v1.4s, betaV0
|
||||||
|
fmul v2.4s, v2.4s, betaV0
|
||||||
|
fmul v3.4s, v3.4s, betaV0
|
||||||
|
fmul v4.4s, v4.4s, betaV0
|
||||||
|
fmul v5.4s, v5.4s, betaV0
|
||||||
|
fmul v6.4s, v6.4s, betaV0
|
||||||
|
fmul v7.4s, v7.4s, betaV0
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* End of macro definitions
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
|
||||||
|
ldr LDC, [sp]
|
||||||
|
SAVE_REGS
|
||||||
|
|
||||||
|
.Lgemm_beta_BEGIN:
|
||||||
|
|
||||||
|
fmov beta0, BETA
|
||||||
|
cmp N, #0
|
||||||
|
ble .Lgemm_beta_L999
|
||||||
|
|
||||||
|
fcmp BETA, #0.0
|
||||||
|
beq .Lgemm_beta_zero_01
|
||||||
|
|
||||||
|
.Lgemm_beta_01:
|
||||||
|
|
||||||
|
lsl LDC, LDC, #2
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_02:
|
||||||
|
|
||||||
|
mov A01, C00
|
||||||
|
add C00, C00, LDC
|
||||||
|
asr I, M, #5
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lgemm_beta_04
|
||||||
|
add A02, A01, #32
|
||||||
|
add A03, A02, #32
|
||||||
|
add A04, A03, #32
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_03:
|
||||||
|
|
||||||
|
prfm PLDL1KEEP, [A01, prfm_size]
|
||||||
|
|
||||||
|
ldp q0, q1, [A01]
|
||||||
|
ldp q2, q3, [A02]
|
||||||
|
ldp q4, q5, [A03]
|
||||||
|
ldp q6, q7, [A04]
|
||||||
|
|
||||||
|
fmul v0.4s, v0.4s, betaV0
|
||||||
|
fmul v1.4s, v1.4s, betaV0
|
||||||
|
|
||||||
|
fmul v2.4s, v2.4s, betaV0
|
||||||
|
fmul v3.4s, v3.4s, betaV0
|
||||||
|
|
||||||
|
fmul v4.4s, v4.4s, betaV0
|
||||||
|
fmul v5.4s, v5.4s, betaV0
|
||||||
|
|
||||||
|
fmul v6.4s, v6.4s, betaV0
|
||||||
|
fmul v7.4s, v7.4s, betaV0
|
||||||
|
|
||||||
|
prfm PLDL1KEEP, [A01, prfm_size + 64]
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s}, [A01]
|
||||||
|
add A01, A01, calc_size
|
||||||
|
st1 {v2.4s, v3.4s}, [A02]
|
||||||
|
add A02, A02, calc_size
|
||||||
|
st1 {v4.4s, v5.4s}, [A03]
|
||||||
|
add A03, A03, calc_size
|
||||||
|
st1 {v6.4s, v7.4s}, [A04]
|
||||||
|
add A04, A04, calc_size
|
||||||
|
|
||||||
|
subs I , I , #1
|
||||||
|
bne .Lgemm_beta_03
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_04:
|
||||||
|
|
||||||
|
and I, M , #31
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lgemm_beta_06
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_05:
|
||||||
|
|
||||||
|
ldr s12, [A01]
|
||||||
|
fmul s12, s12, beta0
|
||||||
|
str s12, [A01]
|
||||||
|
add A01, A01, #4
|
||||||
|
|
||||||
|
subs I , I , #1
|
||||||
|
bne .Lgemm_beta_05
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_06:
|
||||||
|
|
||||||
|
subs N , N, #1 // N--
|
||||||
|
bne .Lgemm_beta_02
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_L999:
|
||||||
|
|
||||||
|
mov x0, #0
|
||||||
|
RESTORE_REGS
|
||||||
|
ret
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_zero_01:
|
||||||
|
|
||||||
|
INIT_ZERO
|
||||||
|
lsl LDC, LDC, #2
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_zero_02:
|
||||||
|
|
||||||
|
mov A01, C00
|
||||||
|
add C00, C00, LDC
|
||||||
|
|
||||||
|
asr I, M, #5
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lgemm_beta_zero_04
|
||||||
|
add A02, A01, #32
|
||||||
|
add A03, A02, #32
|
||||||
|
add A04, A03, #32
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_zero_03:
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s}, [A01]
|
||||||
|
add A01, A01, calc_size
|
||||||
|
st1 {v2.4s, v3.4s}, [A02]
|
||||||
|
add A02, A02, calc_size
|
||||||
|
st1 {v4.4s, v5.4s}, [A03]
|
||||||
|
add A03, A03, calc_size
|
||||||
|
st1 {v6.4s, v7.4s}, [A04]
|
||||||
|
add A04, A04, calc_size
|
||||||
|
|
||||||
|
subs I, I, #1
|
||||||
|
bne .Lgemm_beta_zero_03
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_zero_04:
|
||||||
|
|
||||||
|
and I, M, #31
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lgemm_beta_zero_06
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_zero_05:
|
||||||
|
|
||||||
|
str beta0, [A01]
|
||||||
|
add A01, A01, #4
|
||||||
|
|
||||||
|
subs I, I, #1
|
||||||
|
bne .Lgemm_beta_zero_05
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_zero_06:
|
||||||
|
|
||||||
|
subs N, N, #1
|
||||||
|
bne .Lgemm_beta_zero_02
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lgemm_beta_zero_L999:
|
||||||
|
mov x0, #0
|
||||||
|
RESTORE_REGS
|
||||||
|
ret
|
||||||
|
|
||||||
|
EPILOGUE
|
|
@ -0,0 +1,824 @@
|
||||||
|
/***************************************************************************
|
||||||
|
Copyright (c) 2019, The OpenBLAS Project
|
||||||
|
All rights reserved.
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#define ASSEMBLER
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
#define M x0
|
||||||
|
#define N x1
|
||||||
|
#define A x2
|
||||||
|
#define LDA x3
|
||||||
|
#define B x4
|
||||||
|
|
||||||
|
#define M8 x5
|
||||||
|
|
||||||
|
#define A01 x6
|
||||||
|
#define A02 x7
|
||||||
|
#define A03 x8
|
||||||
|
#define A04 x9
|
||||||
|
#define A05 x10
|
||||||
|
#define A06 x11
|
||||||
|
#define A07 x12
|
||||||
|
#define A08 x13
|
||||||
|
|
||||||
|
#define B01 x14
|
||||||
|
#define B02 x15
|
||||||
|
#define B03 x16
|
||||||
|
#define B04 x17
|
||||||
|
#define B00 x22
|
||||||
|
|
||||||
|
|
||||||
|
#define I x18
|
||||||
|
#define J x19
|
||||||
|
|
||||||
|
#define TEMP1 x20
|
||||||
|
|
||||||
|
#define A_PREFETCH 256
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* Macro definitions
|
||||||
|
**************************************************************************************/
|
||||||
|
.macro SAVE_REGS
|
||||||
|
add sp, sp, #-(11 * 16)
|
||||||
|
stp d8, d9, [sp, #(0 * 16)]
|
||||||
|
stp d10, d11, [sp, #(1 * 16)]
|
||||||
|
stp d12, d13, [sp, #(2 * 16)]
|
||||||
|
stp d14, d15, [sp, #(3 * 16)]
|
||||||
|
stp d16, d17, [sp, #(4 * 16)]
|
||||||
|
stp x18, x19, [sp, #(5 * 16)]
|
||||||
|
stp x20, x21, [sp, #(6 * 16)]
|
||||||
|
stp x22, x23, [sp, #(7 * 16)]
|
||||||
|
stp x24, x25, [sp, #(8 * 16)]
|
||||||
|
stp x26, x27, [sp, #(9 * 16)]
|
||||||
|
str x28, [sp, #(10 * 16)]
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro RESTORE_REGS
|
||||||
|
ldp d8, d9, [sp, #(0 * 16)]
|
||||||
|
ldp d10, d11, [sp, #(1 * 16)]
|
||||||
|
ldp d12, d13, [sp, #(2 * 16)]
|
||||||
|
ldp d14, d15, [sp, #(3 * 16)]
|
||||||
|
ldp d16, d17, [sp, #(4 * 16)]
|
||||||
|
ldp x18, x19, [sp, #(5 * 16)]
|
||||||
|
ldp x20, x21, [sp, #(6 * 16)]
|
||||||
|
ldp x22, x23, [sp, #(7 * 16)]
|
||||||
|
ldp x24, x25, [sp, #(8 * 16)]
|
||||||
|
ldp x26, x27, [sp, #(9 * 16)]
|
||||||
|
ldr x28, [sp, #(10 * 16)]
|
||||||
|
add sp, sp, #(11*16)
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*************************************************************************************************************************/
|
||||||
|
|
||||||
|
.macro COPY16x8
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A05, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A06, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A07, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A08, #A_PREFETCH]
|
||||||
|
//prfm PSTL1KEEP, [B00, M8]
|
||||||
|
|
||||||
|
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
|
||||||
|
add A01, A01, #64
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
|
||||||
|
add TEMP1, B00, #64
|
||||||
|
|
||||||
|
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02]
|
||||||
|
add A02, A02, #64
|
||||||
|
|
||||||
|
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03]
|
||||||
|
add A03, A03, #64
|
||||||
|
|
||||||
|
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04]
|
||||||
|
add A04, A04, #64
|
||||||
|
|
||||||
|
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [A05]
|
||||||
|
add A05, A05, #64
|
||||||
|
|
||||||
|
st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [A06]
|
||||||
|
add A06, A06, #64
|
||||||
|
|
||||||
|
st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [A07]
|
||||||
|
add A07, A07, #64
|
||||||
|
|
||||||
|
st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [A08]
|
||||||
|
add A08, A08, #64
|
||||||
|
|
||||||
|
st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
add B00, B00, M8
|
||||||
|
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY8x8
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A05, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A06, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A07, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A08, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldp q0, q1, [A01]
|
||||||
|
ldp q2, q3, [A02]
|
||||||
|
add A01, A01, #32
|
||||||
|
add A02, A02, #32
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01]
|
||||||
|
add B01, B01, #64
|
||||||
|
|
||||||
|
ldp q4, q5, [A03]
|
||||||
|
ldp q6, q7, [A04]
|
||||||
|
add A03, A03, #32
|
||||||
|
add A04, A04, #32
|
||||||
|
|
||||||
|
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01]
|
||||||
|
add B01, B01, #64
|
||||||
|
|
||||||
|
ldp q8, q9, [A05]
|
||||||
|
ldp q10, q11, [A06]
|
||||||
|
add A05, A05, #32
|
||||||
|
add A06, A06, #32
|
||||||
|
|
||||||
|
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [B01]
|
||||||
|
add B01, B01, #64
|
||||||
|
|
||||||
|
ldp q12, q13, [A07]
|
||||||
|
ldp q14, q15, [A08]
|
||||||
|
add A07, A07, #32
|
||||||
|
add A08, A08, #32
|
||||||
|
|
||||||
|
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [B01]
|
||||||
|
add B01, B01, #64
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY4x8
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A05, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A06, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A07, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A08, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr q0, [A01]
|
||||||
|
ldr q1, [A02]
|
||||||
|
ldr q2, [A03]
|
||||||
|
ldr q3, [A04]
|
||||||
|
add A01, A01, #16
|
||||||
|
add A02, A02, #16
|
||||||
|
add A03, A03, #16
|
||||||
|
add A04, A04, #16
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02]
|
||||||
|
add B02, B02, #64
|
||||||
|
|
||||||
|
ldr q4, [A05]
|
||||||
|
ldr q5, [A06]
|
||||||
|
ldr q6, [A07]
|
||||||
|
ldr q7, [A08]
|
||||||
|
|
||||||
|
add A05, A05, #16
|
||||||
|
add A06, A06, #16
|
||||||
|
add A07, A07, #16
|
||||||
|
add A08, A08, #16
|
||||||
|
|
||||||
|
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B02]
|
||||||
|
add B02, B02, #64
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY2x8
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A05, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A06, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A07, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A08, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr d0, [A01]
|
||||||
|
ldr d1, [A02]
|
||||||
|
ldr d2, [A03]
|
||||||
|
ldr d3, [A04]
|
||||||
|
|
||||||
|
add A01, A01, #8
|
||||||
|
add A02, A02, #8
|
||||||
|
add A03, A03, #8
|
||||||
|
add A04, A04, #8
|
||||||
|
|
||||||
|
stp d0, d1, [B03]
|
||||||
|
add B03, B03, #16
|
||||||
|
stp d2, d3, [B03]
|
||||||
|
add B03, B03, #16
|
||||||
|
|
||||||
|
ldr d4, [A05]
|
||||||
|
ldr d5, [A06]
|
||||||
|
ldr d6, [A07]
|
||||||
|
ldr d7, [A08]
|
||||||
|
|
||||||
|
add A05, A05, #8
|
||||||
|
add A06, A06, #8
|
||||||
|
add A07, A07, #8
|
||||||
|
add A08, A08, #8
|
||||||
|
|
||||||
|
stp d4, d5, [B03]
|
||||||
|
add B03, B03, #16
|
||||||
|
stp d6, d7, [B03]
|
||||||
|
add B03, B03, #16
|
||||||
|
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY1x8
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A05, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A06, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A07, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A08, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr s0, [A01]
|
||||||
|
ldr s1, [A02]
|
||||||
|
ldr s2, [A03]
|
||||||
|
ldr s3, [A04]
|
||||||
|
|
||||||
|
add A01, A01, #4
|
||||||
|
add A02, A02, #4
|
||||||
|
add A03, A03, #4
|
||||||
|
add A04, A04, #4
|
||||||
|
|
||||||
|
stp s0, s1, [B04]
|
||||||
|
add B04, B04, #8
|
||||||
|
stp s2, s3, [B04]
|
||||||
|
add B04, B04, #8
|
||||||
|
|
||||||
|
ldr s4, [A05]
|
||||||
|
ldr s5, [A06]
|
||||||
|
ldr s6, [A07]
|
||||||
|
ldr s7, [A08]
|
||||||
|
|
||||||
|
ldr d4, [A05], #8
|
||||||
|
ldr d5, [A06], #8
|
||||||
|
ldr d6, [A07], #8
|
||||||
|
ldr d7, [A08], #8
|
||||||
|
|
||||||
|
stp s4, s5, [B04]
|
||||||
|
add B04, B04, #8
|
||||||
|
stp s6, s7, [B04]
|
||||||
|
add B04, B04, #8
|
||||||
|
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*************************************************************************************************************************/
|
||||||
|
.macro COPY16x4
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
|
||||||
|
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
|
||||||
|
add A01, A01, #64
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
|
||||||
|
add TEMP1, B00, #64
|
||||||
|
|
||||||
|
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02]
|
||||||
|
add A02, A02, #64
|
||||||
|
|
||||||
|
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03]
|
||||||
|
add A03, A03, #64
|
||||||
|
|
||||||
|
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1]
|
||||||
|
add TEMP1, TEMP1, #64
|
||||||
|
|
||||||
|
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04]
|
||||||
|
add A04, A04, #64
|
||||||
|
|
||||||
|
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1]
|
||||||
|
|
||||||
|
add B00, B00, M8
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY8x4
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldp q0, q1, [A01]
|
||||||
|
ldp q2, q3, [A02]
|
||||||
|
add A01, A01, #32
|
||||||
|
add A02, A02, #32
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01]
|
||||||
|
add B01, B01, #64
|
||||||
|
|
||||||
|
ldp q4, q5, [A03]
|
||||||
|
ldp q6, q7, [A04]
|
||||||
|
add A03, A03, #32
|
||||||
|
add A04, A04, #32
|
||||||
|
|
||||||
|
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01]
|
||||||
|
add B01, B01, #64
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY4x4
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr q0, [A01]
|
||||||
|
ldr q1, [A02]
|
||||||
|
ldr q2, [A03]
|
||||||
|
ldr q3, [A04]
|
||||||
|
add A01, A01, #16
|
||||||
|
add A02, A02, #16
|
||||||
|
add A03, A03, #16
|
||||||
|
add A04, A04, #16
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02]
|
||||||
|
|
||||||
|
add B02, B02, #64
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY2x4
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr d0, [A01]
|
||||||
|
ldr d1, [A02]
|
||||||
|
ldr d2, [A03]
|
||||||
|
ldr d3, [A04]
|
||||||
|
|
||||||
|
add A01, A01, #8
|
||||||
|
add A02, A02, #8
|
||||||
|
add A03, A03, #8
|
||||||
|
add A04, A04, #8
|
||||||
|
|
||||||
|
stp d0, d1, [B03]
|
||||||
|
add B03, B03, #16
|
||||||
|
stp d2, d3, [B03]
|
||||||
|
|
||||||
|
add B03, B03, #16
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY1x4
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr s0, [A01]
|
||||||
|
ldr s1, [A02]
|
||||||
|
ldr s2, [A03]
|
||||||
|
ldr s3, [A04]
|
||||||
|
|
||||||
|
add A01, A01, #4
|
||||||
|
add A02, A02, #4
|
||||||
|
add A03, A03, #4
|
||||||
|
add A04, A04, #4
|
||||||
|
|
||||||
|
stp s0, s1, [B04]
|
||||||
|
add B04, B04, #8
|
||||||
|
stp s2, s3, [B04]
|
||||||
|
add B04, B04, #8
|
||||||
|
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*************************************************************************************************************************/
|
||||||
|
|
||||||
|
.macro COPY16x2
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
|
||||||
|
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
|
||||||
|
add A01, A01, #64
|
||||||
|
|
||||||
|
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02]
|
||||||
|
add A02, A02, #64
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
|
||||||
|
add TEMP1, B00, #64
|
||||||
|
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1]
|
||||||
|
add B00, B00, M8
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY8x2
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
|
||||||
|
ld1 {v0.4s, v1.4s}, [A01]
|
||||||
|
ld1 {v2.4s, v3.4s}, [A02]
|
||||||
|
add A01, A01, #32
|
||||||
|
add A02, A02, #32
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01]
|
||||||
|
add B01, B01, #64
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY4x2
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr q0, [A01]
|
||||||
|
ldr q1, [A02]
|
||||||
|
add A01, A01, #16
|
||||||
|
add A02, A02, #16
|
||||||
|
|
||||||
|
stp q0, q1, [B02]
|
||||||
|
add B02, B02, #32
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY2x2
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr d0, [A01]
|
||||||
|
ldr d1, [A02]
|
||||||
|
|
||||||
|
add A01, A01, #8
|
||||||
|
add A02, A02, #8
|
||||||
|
|
||||||
|
stp d0, d1, [B03]
|
||||||
|
add B03, B03, #16
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY1x2
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr s0, [A01]
|
||||||
|
ldr s1, [A02]
|
||||||
|
|
||||||
|
add A01, A01, #4
|
||||||
|
add A02, A02, #4
|
||||||
|
|
||||||
|
stp s0, s1, [B04]
|
||||||
|
|
||||||
|
add B04, B04, #8
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*************************************************************************************************************************/
|
||||||
|
|
||||||
|
.macro COPY16x1
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
|
||||||
|
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
|
||||||
|
add A01, A01, #64
|
||||||
|
|
||||||
|
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
|
||||||
|
add B00, B00, M8
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY8x1
|
||||||
|
prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldp q0, q1, [A01]
|
||||||
|
add A01, A01, #32
|
||||||
|
stp q0, q1, [B01]
|
||||||
|
|
||||||
|
add B01, B01, #32
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY4x1
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr q0, [A01]
|
||||||
|
add A01, A01, #16
|
||||||
|
str q0, [B02]
|
||||||
|
|
||||||
|
add B02, B02, #16
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY2x1
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr d0, [A01]
|
||||||
|
add A01, A01, #8
|
||||||
|
str d0, [B03]
|
||||||
|
|
||||||
|
add B03, B03, #8
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro COPY1x1
|
||||||
|
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
|
||||||
|
|
||||||
|
ldr s0, [A01]
|
||||||
|
add A01, A01, #4
|
||||||
|
str s0, [B04]
|
||||||
|
|
||||||
|
add B04, B04, #4
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/**************************************************************************************
|
||||||
|
* End of macro definitions
|
||||||
|
**************************************************************************************/
|
||||||
|
|
||||||
|
PROLOGUE
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
|
||||||
|
SAVE_REGS
|
||||||
|
|
||||||
|
lsl LDA, LDA, #2 // LDA = LDA * SIZE
|
||||||
|
|
||||||
|
lsl TEMP1, M, #2 // TEMP1 = M * SIZE
|
||||||
|
|
||||||
|
and B01 , N , #-16
|
||||||
|
and B02 , N , #-8
|
||||||
|
and B03 , N , #-4
|
||||||
|
and B04 , N , #-2
|
||||||
|
|
||||||
|
mul B01, B01, TEMP1
|
||||||
|
mul B02, B02, TEMP1
|
||||||
|
mul B03, B03, TEMP1
|
||||||
|
mul B04, B04, TEMP1
|
||||||
|
|
||||||
|
add B01 , B01, B
|
||||||
|
add B02 , B02, B
|
||||||
|
add B03 , B03, B
|
||||||
|
add B04 , B04, B
|
||||||
|
|
||||||
|
lsl M8, M, #6 // M8 = M * 16 * SIZE
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L8_BEGIN:
|
||||||
|
asr J, M, #3 // J = M / 8
|
||||||
|
cmp J, #0
|
||||||
|
ble .Lsgemm_tcopy_L4_BEGIN
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lsgemm_tcopy_L8_M16_BEGIN:
|
||||||
|
|
||||||
|
mov A01, A
|
||||||
|
add A02, A01, LDA
|
||||||
|
add A03, A02, LDA
|
||||||
|
add A04, A03, LDA
|
||||||
|
add A05, A04, LDA
|
||||||
|
add A06, A05, LDA
|
||||||
|
add A07, A06, LDA
|
||||||
|
add A08, A07, LDA
|
||||||
|
add A, A08, LDA
|
||||||
|
|
||||||
|
mov B00, B
|
||||||
|
add B, B00, #512 // B = B + 8 * 16 * SIZE
|
||||||
|
|
||||||
|
asr I, N, #4 // I = N / 16
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lsgemm_tcopy_L8_M16_40
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lsgemm_tcopy_L8_M16_20:
|
||||||
|
|
||||||
|
COPY16x8
|
||||||
|
|
||||||
|
subs I , I , #1
|
||||||
|
bne .Lsgemm_tcopy_L8_M16_20
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L8_M16_40:
|
||||||
|
tst N , #8
|
||||||
|
ble .Lsgemm_tcopy_L8_M16_60
|
||||||
|
|
||||||
|
COPY8x8
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L8_M16_60:
|
||||||
|
tst N , #4
|
||||||
|
ble .Lsgemm_tcopy_L8_M16_80
|
||||||
|
|
||||||
|
COPY4x8
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L8_M16_80:
|
||||||
|
|
||||||
|
tst N , #2
|
||||||
|
ble .Lsgemm_tcopy_L8_M16_100
|
||||||
|
|
||||||
|
COPY2x8
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L8_M16_100:
|
||||||
|
|
||||||
|
tst N, #1
|
||||||
|
ble .Lsgemm_tcopy_L8_M16_END
|
||||||
|
|
||||||
|
COPY1x8
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L8_M16_END:
|
||||||
|
|
||||||
|
subs J , J, #1 // j--
|
||||||
|
bne .Lsgemm_tcopy_L8_M16_BEGIN
|
||||||
|
|
||||||
|
/*********************************************************************************************/
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L4_BEGIN:
|
||||||
|
tst M, #7
|
||||||
|
ble .Lsgemm_tcopy_L999
|
||||||
|
|
||||||
|
tst M, #4
|
||||||
|
ble .Lsgemm_tcopy_L2_BEGIN
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L4_M16_BEGIN:
|
||||||
|
|
||||||
|
mov A01, A
|
||||||
|
add A02, A01, LDA
|
||||||
|
add A03, A02, LDA
|
||||||
|
add A04, A03, LDA
|
||||||
|
add A, A04, LDA
|
||||||
|
|
||||||
|
mov B00, B
|
||||||
|
add B, B00, #256 // B = B + 4 * 16 * SIZE
|
||||||
|
|
||||||
|
asr I, N, #4 // I = N / 16
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lsgemm_tcopy_L4_M16_40
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lsgemm_tcopy_L4_M16_20:
|
||||||
|
|
||||||
|
COPY16x4
|
||||||
|
|
||||||
|
subs I , I , #1
|
||||||
|
bne .Lsgemm_tcopy_L4_M16_20
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L4_M16_40:
|
||||||
|
tst N , #8
|
||||||
|
ble .Lsgemm_tcopy_L4_M16_60
|
||||||
|
|
||||||
|
COPY8x4
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L4_M16_60:
|
||||||
|
tst N , #4
|
||||||
|
ble .Lsgemm_tcopy_L4_M16_80
|
||||||
|
|
||||||
|
COPY4x4
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L4_M16_80:
|
||||||
|
|
||||||
|
tst N , #2
|
||||||
|
ble .Lsgemm_tcopy_L4_M16_100
|
||||||
|
|
||||||
|
COPY2x4
|
||||||
|
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L4_M16_100:
|
||||||
|
|
||||||
|
tst N, #1
|
||||||
|
ble .Lsgemm_tcopy_L4_M16_END
|
||||||
|
|
||||||
|
COPY1x4
|
||||||
|
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L4_M16_END:
|
||||||
|
|
||||||
|
/*********************************************************************************************/
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L2_BEGIN:
|
||||||
|
|
||||||
|
tst M, #3
|
||||||
|
ble .Lsgemm_tcopy_L999
|
||||||
|
|
||||||
|
tst M, #2
|
||||||
|
ble .Lsgemm_tcopy_L1_BEGIN
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L2_M16_BEGIN:
|
||||||
|
mov A01, A
|
||||||
|
add A02, A01, LDA
|
||||||
|
add A, A02, LDA
|
||||||
|
|
||||||
|
mov B00, B
|
||||||
|
add B, B00, #128 // B = B + 2 * 16 * SIZE
|
||||||
|
|
||||||
|
asr I, N, #4 // I = N / 16
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lsgemm_tcopy_L2_M16_40
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lsgemm_tcopy_L2_M16_20:
|
||||||
|
|
||||||
|
COPY16x2
|
||||||
|
|
||||||
|
subs I , I , #1
|
||||||
|
bne .Lsgemm_tcopy_L2_M16_20
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L2_M16_40:
|
||||||
|
tst N , #8
|
||||||
|
ble .Lsgemm_tcopy_L2_M16_60
|
||||||
|
|
||||||
|
COPY8x2
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L2_M16_60:
|
||||||
|
tst N , #4
|
||||||
|
ble .Lsgemm_tcopy_L2_M16_80
|
||||||
|
|
||||||
|
COPY4x2
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L2_M16_80:
|
||||||
|
|
||||||
|
tst N , #2
|
||||||
|
ble .Lsgemm_tcopy_L2_M16_100
|
||||||
|
|
||||||
|
COPY2x2
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L2_M16_100:
|
||||||
|
|
||||||
|
tst N , #1
|
||||||
|
ble .Lsgemm_tcopy_L2_M16_END
|
||||||
|
|
||||||
|
COPY1x2
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L2_M16_END:
|
||||||
|
|
||||||
|
/*********************************************************************************************/
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L1_BEGIN:
|
||||||
|
|
||||||
|
tst M, #1
|
||||||
|
ble .Lsgemm_tcopy_L999
|
||||||
|
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L1_M16_BEGIN:
|
||||||
|
|
||||||
|
mov A01, A // A01 = A
|
||||||
|
mov B00, B
|
||||||
|
|
||||||
|
asr I, N, #4 // I = M / 16
|
||||||
|
cmp I, #0
|
||||||
|
ble .Lsgemm_tcopy_L1_M16_40
|
||||||
|
|
||||||
|
.align 5
|
||||||
|
.Lsgemm_tcopy_L1_M16_20:
|
||||||
|
|
||||||
|
COPY16x1
|
||||||
|
|
||||||
|
subs I , I , #1
|
||||||
|
bne .Lsgemm_tcopy_L1_M16_20
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L1_M16_40:
|
||||||
|
tst N , #8
|
||||||
|
ble .Lsgemm_tcopy_L1_M16_60
|
||||||
|
|
||||||
|
COPY8x1
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L1_M16_60:
|
||||||
|
tst N , #4
|
||||||
|
ble .Lsgemm_tcopy_L1_M16_80
|
||||||
|
|
||||||
|
COPY4x1
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L1_M16_80:
|
||||||
|
|
||||||
|
tst N , #2
|
||||||
|
ble .Lsgemm_tcopy_L1_M16_100
|
||||||
|
|
||||||
|
COPY2x1
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L1_M16_100:
|
||||||
|
|
||||||
|
tst N , #1
|
||||||
|
ble .Lsgemm_tcopy_L1_M16_END
|
||||||
|
|
||||||
|
COPY1x1
|
||||||
|
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L1_M16_END:
|
||||||
|
|
||||||
|
.Lsgemm_tcopy_L999:
|
||||||
|
mov x0, #0 // set return value
|
||||||
|
RESTORE_REGS
|
||||||
|
ret
|
||||||
|
|
||||||
|
EPILOGUE
|
||||||
|
|
||||||
|
|
|
@ -739,6 +739,26 @@ static void init_parameter(void) {
|
||||||
}
|
}
|
||||||
#else //POWER
|
#else //POWER
|
||||||
|
|
||||||
|
#if defined(ARCH_ZARCH)
|
||||||
|
static void init_parameter(void) {
|
||||||
|
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
|
||||||
|
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
|
||||||
|
|
||||||
|
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
|
||||||
|
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
|
||||||
|
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
|
||||||
|
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
|
||||||
|
|
||||||
|
|
||||||
|
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
|
||||||
|
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
|
||||||
|
}
|
||||||
|
#else //ZARCH
|
||||||
|
|
||||||
#ifdef ARCH_X86
|
#ifdef ARCH_X86
|
||||||
static int get_l2_size_old(void){
|
static int get_l2_size_old(void){
|
||||||
int i, eax, ebx, ecx, edx, cpuid_level;
|
int i, eax, ebx, ecx, edx, cpuid_level;
|
||||||
|
@ -1325,4 +1345,5 @@ static void init_parameter(void) {
|
||||||
|
|
||||||
}
|
}
|
||||||
#endif //POWER
|
#endif //POWER
|
||||||
|
#endif //ZARCH
|
||||||
#endif //defined(ARCH_ARM64)
|
#endif //defined(ARCH_ARM64)
|
||||||
|
|
|
@ -98,5 +98,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
|
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
|
||||||
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S
|
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c
|
||||||
|
|
||||||
|
|
|
@ -95,5 +95,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
|
||||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
|
||||||
|
|
||||||
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
|
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
|
||||||
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S
|
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,224 @@
|
||||||
|
/* %0 = "+r"(a_pointer), %1 = "+r"(b_pointer), %2 = "+r"(c_pointer), %3 = "+r"(ldc_in_bytes), %4 for k_count, %5 for c_store */
|
||||||
|
/* r12 = k << 5(const), r13 = k(const), r14 = b_head_pos(const), r15 = tmp */
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
//recommended settings: GEMM_Q=256, GEMM_P=256
|
||||||
|
|
||||||
|
/* m = 4 *//* ymm0 for alpha, ymm1-ymm3 for temporary use, ymm4-ymm15 for accumulators */
|
||||||
|
#define KERNEL_k1m4n1 \
|
||||||
|
"vmovupd (%0),%%ymm1; addq $32,%0;"\
|
||||||
|
"vbroadcastsd (%1),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,%%ymm4;"\
|
||||||
|
"addq $8,%1;"
|
||||||
|
#define KERNEL_h_k1m4n2 \
|
||||||
|
"vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2; addq $32,%0;"\
|
||||||
|
"vbroadcastf128 (%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm4; vfmadd231pd %%ymm2,%%ymm3,%%ymm5;"
|
||||||
|
#define KERNEL_k1m4n2 KERNEL_h_k1m4n2 "addq $16,%1;"
|
||||||
|
#define KERNEL_h_k1m4n4 \
|
||||||
|
KERNEL_h_k1m4n2 "vbroadcastf128 16(%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm6; vfmadd231pd %%ymm2,%%ymm3,%%ymm7;"
|
||||||
|
#define KERNEL_k1m4n4 KERNEL_h_k1m4n4 "addq $32,%1;"
|
||||||
|
#define unit_kernel_k1m4n4(c1,c2,c3,c4,off1,off2,...) \
|
||||||
|
"vbroadcastf128 "#off1"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";"\
|
||||||
|
"vbroadcastf128 "#off2"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c3"; vfmadd231pd %%ymm2,%%ymm3,"#c4";"
|
||||||
|
#define KERNEL_h_k1m4n8 KERNEL_h_k1m4n4 unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1)
|
||||||
|
#define KERNEL_k1m4n8 KERNEL_h_k1m4n8 "addq $32,%1;"
|
||||||
|
#define KERNEL_h_k1m4n12 KERNEL_h_k1m4n8 unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2)
|
||||||
|
#define KERNEL_k1m4n12 KERNEL_h_k1m4n12 "addq $32,%1;"
|
||||||
|
#define KERNEL_k2m4n1 KERNEL_k1m4n1 KERNEL_k1m4n1
|
||||||
|
#define KERNEL_k2m4n2 KERNEL_k1m4n2 KERNEL_k1m4n2
|
||||||
|
#define KERNEL_k2m4n4 KERNEL_k1m4n4 KERNEL_k1m4n4
|
||||||
|
#define KERNEL_k2m4n8 KERNEL_k1m4n8 KERNEL_k1m4n8
|
||||||
|
#define KERNEL_k2m4n12 \
|
||||||
|
"vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2;"\
|
||||||
|
unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,0,16,%1)\
|
||||||
|
unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1)\
|
||||||
|
unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2)\
|
||||||
|
"vmovddup 32(%0),%%ymm1; vmovddup 40(%0),%%ymm2; prefetcht0 512(%0); addq $64,%0;"\
|
||||||
|
unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,32,48,%1)\
|
||||||
|
unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,32,48,%1,%%r12,1)\
|
||||||
|
unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,32,48,%1,%%r12,2) "addq $64,%1;"
|
||||||
|
#define INIT_m4n1 "vpxor %%ymm4,%%ymm4,%%ymm4;"
|
||||||
|
#define INIT_m4n2 INIT_m4n1 "vpxor %%ymm5,%%ymm5,%%ymm5;"
|
||||||
|
#define INIT_m4n4 INIT_m4n2 "vpxor %%ymm6,%%ymm6,%%ymm6;vpxor %%ymm7,%%ymm7,%%ymm7;"
|
||||||
|
#define unit_init_m4n4(c1,c2,c3,c4) \
|
||||||
|
"vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";vpxor "#c3","#c3","#c3";vpxor "#c4","#c4","#c4";"
|
||||||
|
#define INIT_m4n8 INIT_m4n4 unit_init_m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11)
|
||||||
|
#define INIT_m4n12 INIT_m4n8 unit_init_m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15)
|
||||||
|
#define SAVE_h_m4n1 \
|
||||||
|
"vpermpd $216,%%ymm4,%%ymm3; vunpcklpd %%ymm3,%%ymm3,%%ymm1; vunpckhpd %%ymm3,%%ymm3,%%ymm2;"\
|
||||||
|
"vfmadd213pd (%2),%%ymm0,%%ymm1; vfmadd213pd 32(%2),%%ymm0,%%ymm2; vmovupd %%ymm1,(%2); vmovupd %%ymm2,32(%2);"
|
||||||
|
#define unit_save_m4n2(c1,c2) \
|
||||||
|
"vperm2f128 $2,"#c1","#c2",%%ymm2; vperm2f128 $19,"#c1","#c2","#c2"; vmovapd %%ymm2,"#c1";"\
|
||||||
|
"vunpcklpd "#c1","#c1",%%ymm2; vunpcklpd "#c2","#c2",%%ymm3;"\
|
||||||
|
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd 32(%5),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,32(%5);"\
|
||||||
|
"vunpckhpd "#c1","#c1",%%ymm2; vunpckhpd "#c2","#c2",%%ymm3;"\
|
||||||
|
"vfmadd213pd (%5,%3,1),%%ymm0,%%ymm2; vfmadd213pd 32(%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5,%3,1); vmovupd %%ymm3,32(%5,%3,1);"\
|
||||||
|
"leaq (%5,%3,2),%5;"
|
||||||
|
#define SAVE_h_m4n2 "movq %2,%5;" unit_save_m4n2(%%ymm4,%%ymm5)
|
||||||
|
#define SAVE_h_m4n4 SAVE_h_m4n2 unit_save_m4n2(%%ymm6,%%ymm7)
|
||||||
|
#define SAVE_h_m4n8 SAVE_h_m4n4 unit_save_m4n2(%%ymm8,%%ymm9) unit_save_m4n2(%%ymm10,%%ymm11)
|
||||||
|
#define SAVE_h_m4n12 SAVE_h_m4n8 unit_save_m4n2(%%ymm12,%%ymm13) unit_save_m4n2(%%ymm14,%%ymm15)
|
||||||
|
#define SAVE_m4(ndim) SAVE_h_m4n##ndim "addq $64,%2;"
|
||||||
|
#define COMPUTE_m4(ndim) \
|
||||||
|
INIT_m4n##ndim\
|
||||||
|
"movq %%r13,%4; movq %%r14,%1; movq %2,%5; xorq %%r15,%%r15;"\
|
||||||
|
"cmpq $24,%4; jb "#ndim"004042f;"\
|
||||||
|
#ndim"004041:\n\t"\
|
||||||
|
"cmpq $126,%%r15; movq $126,%%r15; cmoveq %3,%%r15;"\
|
||||||
|
KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\
|
||||||
|
"prefetcht1 (%5); subq $63,%5;"\
|
||||||
|
KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\
|
||||||
|
"addq %%r15,%5; prefetcht1 (%8); addq $32,%8;"\
|
||||||
|
"subq $8,%4; cmpq $16,%4; jnb "#ndim"004041b;"\
|
||||||
|
"movq %2,%5;"\
|
||||||
|
#ndim"004042:\n\t"\
|
||||||
|
"testq %4,%4; jz "#ndim"004043f;"\
|
||||||
|
"prefetcht0 (%5); prefetcht0 63(%5);"\
|
||||||
|
KERNEL_k1m4n##ndim\
|
||||||
|
"prefetcht0 (%5,%3,4); prefetcht0 63(%5,%3,4); addq %3,%5;"\
|
||||||
|
"decq %4; jmp "#ndim"004042b;"\
|
||||||
|
#ndim"004043:\n\t"\
|
||||||
|
"prefetcht0 (%%r14); prefetcht0 64(%%r14);"\
|
||||||
|
SAVE_m4(ndim)
|
||||||
|
|
||||||
|
/* m = 2 *//* vmm0 for alpha, vmm1-vmm3 for temporary use, vmm4-vmm9 for accumulators */
|
||||||
|
#define KERNEL_k1m2n1 \
|
||||||
|
"vmovupd (%0),%%xmm1; addq $16,%0;"\
|
||||||
|
"vmovddup (%1),%%xmm2; vfmadd231pd %%xmm1,%%xmm2,%%xmm4;"\
|
||||||
|
"addq $8,%1;"
|
||||||
|
#define KERNEL_h_k1m2n2 \
|
||||||
|
"vmovddup (%0),%%xmm1; vmovddup 8(%0),%%xmm2; addq $16,%0;"\
|
||||||
|
"vmovupd (%1),%%xmm3; vfmadd231pd %%xmm1,%%xmm3,%%xmm4; vfmadd231pd %%xmm2,%%xmm3,%%xmm5;"
|
||||||
|
#define KERNEL_k1m2n2 KERNEL_h_k1m2n2 "addq $16,%1;"
|
||||||
|
#define unit_kernel_k1m2n4(c1,c2,...) \
|
||||||
|
"vmovupd ("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";"
|
||||||
|
#define KERNEL_h_k1m2n4 \
|
||||||
|
"vbroadcastsd (%0),%%ymm1; vbroadcastsd 8(%0),%%ymm2; addq $16,%0;"\
|
||||||
|
unit_kernel_k1m2n4(%%ymm4,%%ymm5,%1)
|
||||||
|
#define KERNEL_k1m2n4 KERNEL_h_k1m2n4 "addq $32,%1;"
|
||||||
|
#define KERNEL_h_k1m2n8 KERNEL_h_k1m2n4 \
|
||||||
|
unit_kernel_k1m2n4(%%ymm6,%%ymm7,%1,%%r12,1)
|
||||||
|
#define KERNEL_k1m2n8 KERNEL_h_k1m2n8 "addq $32,%1;"
|
||||||
|
#define KERNEL_h_k1m2n12 KERNEL_h_k1m2n8 \
|
||||||
|
unit_kernel_k1m2n4(%%ymm8,%%ymm9,%1,%%r12,2)
|
||||||
|
#define KERNEL_k1m2n12 KERNEL_h_k1m2n12 "addq $32,%1;"
|
||||||
|
#define INIT_m2n1 "vpxor %%xmm4,%%xmm4,%%xmm4;"
|
||||||
|
#define INIT_m2n2 INIT_m2n1 "vpxor %%xmm5,%%xmm5,%%xmm5;"
|
||||||
|
#define unit_init_m2n4(c1,c2) "vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";"
|
||||||
|
#define INIT_m2n4 unit_init_m2n4(%%ymm4,%%ymm5)
|
||||||
|
#define INIT_m2n8 INIT_m2n4 unit_init_m2n4(%%ymm6,%%ymm7)
|
||||||
|
#define INIT_m2n12 INIT_m2n8 unit_init_m2n4(%%ymm8,%%ymm9)
|
||||||
|
#define SAVE_h_m2n1 \
|
||||||
|
"vinsertf128 $1,%%xmm4,%%ymm4,%%ymm4; vpermilpd $12,%%ymm4,%%ymm4; vfmadd213pd (%2),%%ymm0,%%ymm4; vmovupd %%ymm4,(%2);"
|
||||||
|
#define SAVE_h_m2n2 \
|
||||||
|
"vinsertf128 $1,%%xmm5,%%ymm4,%%ymm4; vunpcklpd %%ymm4,%%ymm4,%%ymm1; vunpckhpd %%ymm4,%%ymm4,%%ymm2;"\
|
||||||
|
"vfmadd213pd (%2),%%ymm0,%%ymm1; vmovupd %%ymm1,(%2);"\
|
||||||
|
"vfmadd213pd (%2,%3,1),%%ymm0,%%ymm2; vmovupd %%ymm2,(%2,%3,1);"
|
||||||
|
#define unit_save_m2n4(c1,c2) \
|
||||||
|
"vperm2f128 $2,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\
|
||||||
|
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;"\
|
||||||
|
"vperm2f128 $19,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\
|
||||||
|
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;"
|
||||||
|
#define SAVE_h_m2n4 "movq %2,%5;" unit_save_m2n4(%%ymm4,%%ymm5)
|
||||||
|
#define SAVE_h_m2n8 SAVE_h_m2n4 unit_save_m2n4(%%ymm6,%%ymm7)
|
||||||
|
#define SAVE_h_m2n12 SAVE_h_m2n8 unit_save_m2n4(%%ymm8,%%ymm9)
|
||||||
|
#define SAVE_m2(ndim) SAVE_h_m2n##ndim "addq $32,%2;"
|
||||||
|
#define COMPUTE_m2(ndim) \
|
||||||
|
INIT_m2n##ndim\
|
||||||
|
"movq %%r13,%4; movq %%r14,%1;"\
|
||||||
|
#ndim"002022:\n\t"\
|
||||||
|
"testq %4,%4; jz "#ndim"002023f;"\
|
||||||
|
KERNEL_k1m2n##ndim\
|
||||||
|
"decq %4; jmp "#ndim"002022b;"\
|
||||||
|
#ndim"002023:\n\t"\
|
||||||
|
SAVE_m2(ndim)
|
||||||
|
|
||||||
|
/* m = 1 *//* vmm0 for alpha, vmm1-vmm3 and vmm10-vmm15 for temporary use, vmm4-vmm6 for accumulators */
|
||||||
|
#define KERNEL_k1m1n1 \
|
||||||
|
"vmovsd (%0),%%xmm1; addq $8,%0;"\
|
||||||
|
"vfmadd231sd (%1),%%xmm1,%%xmm4; addq $8,%1;"
|
||||||
|
#define KERNEL_k1m1n2 \
|
||||||
|
"vmovddup (%0),%%xmm1; addq $8,%0;"\
|
||||||
|
"vfmadd231pd (%1),%%xmm1,%%xmm4; addq $16,%1;"
|
||||||
|
#define unit_kernel_k1m1n4(c1,...) \
|
||||||
|
"vmovupd ("#__VA_ARGS__"),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,"#c1";"
|
||||||
|
#define KERNEL_h_k1m1n4 \
|
||||||
|
"vbroadcastsd (%0),%%ymm1; addq $8,%0;"\
|
||||||
|
unit_kernel_k1m1n4(%%ymm4,%1)
|
||||||
|
#define KERNEL_k1m1n4 KERNEL_h_k1m1n4 "addq $32,%1;"
|
||||||
|
#define KERNEL_h_k1m1n8 KERNEL_h_k1m1n4 unit_kernel_k1m1n4(%%ymm5,%1,%%r12,1)
|
||||||
|
#define KERNEL_k1m1n8 KERNEL_h_k1m1n8 "addq $32,%1;"
|
||||||
|
#define KERNEL_h_k1m1n12 KERNEL_h_k1m1n8 unit_kernel_k1m1n4(%%ymm6,%1,%%r12,2)
|
||||||
|
#define KERNEL_k1m1n12 KERNEL_h_k1m1n12 "addq $32,%1;"
|
||||||
|
#define INIT_m1n1 INIT_m2n1
|
||||||
|
#define INIT_m1n2 INIT_m2n1
|
||||||
|
#define INIT_m1n4 "vpxor %%ymm4,%%ymm4,%%ymm4;"
|
||||||
|
#define INIT_m1n8 INIT_m1n4 "vpxor %%ymm5,%%ymm5,%%ymm5;"
|
||||||
|
#define INIT_m1n12 INIT_m1n8 "vpxor %%ymm6,%%ymm6,%%ymm6;"
|
||||||
|
#define SAVE_h_m1n1 \
|
||||||
|
"vmovddup %%xmm4,%%xmm4; vfmadd213pd (%2),%%xmm0,%%xmm4; vmovupd %%xmm4,(%2);"
|
||||||
|
#define SAVE_h_m1n2 \
|
||||||
|
"vunpcklpd %%xmm4,%%xmm4,%%xmm1; vunpckhpd %%xmm4,%%xmm4,%%xmm2;"\
|
||||||
|
"vfmadd213pd (%2),%%xmm0,%%xmm1; vmovupd %%xmm1,(%2);"\
|
||||||
|
"vfmadd213pd (%2,%3,1),%%xmm0,%%xmm2; vmovupd %%xmm2,(%2,%3,1);"
|
||||||
|
#define unit_save_m1n4(c1) \
|
||||||
|
"vunpcklpd "#c1","#c1",%%ymm1; vunpckhpd "#c1","#c1",%%ymm2;"\
|
||||||
|
"vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\
|
||||||
|
"vfmadd213pd %%ymm3,%%ymm0,%%ymm1; vmovupd %%xmm1,(%5); vextractf128 $1,%%ymm1,(%5,%3,2); addq %3,%5;"\
|
||||||
|
"vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\
|
||||||
|
"vfmadd213pd %%ymm3,%%ymm0,%%ymm2; vmovupd %%xmm2,(%5); vextractf128 $1,%%ymm2,(%5,%3,2); addq %3,%5; leaq (%5,%3,2),%5;"
|
||||||
|
#define SAVE_h_m1n4 "movq %2,%5;" unit_save_m1n4(%%ymm4)
|
||||||
|
#define SAVE_h_m1n8 SAVE_h_m1n4 unit_save_m1n4(%%ymm5)
|
||||||
|
#define SAVE_h_m1n12 SAVE_h_m1n8 unit_save_m1n4(%%ymm6)
|
||||||
|
#define SAVE_m1(ndim) SAVE_h_m1n##ndim "addq $16,%2;"
|
||||||
|
#define COMPUTE_m1(ndim) \
|
||||||
|
INIT_m1n##ndim\
|
||||||
|
"movq %%r13,%4; movq %%r14,%1;"\
|
||||||
|
#ndim"001011:\n\t"\
|
||||||
|
"testq %4,%4; jz "#ndim"001012f;"\
|
||||||
|
KERNEL_k1m1n##ndim\
|
||||||
|
"decq %4; jmp "#ndim"001011b;"\
|
||||||
|
#ndim"001012:\n\t"\
|
||||||
|
SAVE_m1(ndim)
|
||||||
|
|
||||||
|
#define COMPUTE(ndim) {\
|
||||||
|
next_b = b_pointer + ndim * K;\
|
||||||
|
__asm__ __volatile__(\
|
||||||
|
"vbroadcastf128 (%6),%%ymm0;"\
|
||||||
|
"movq %4,%%r13; movq %4,%%r12; salq $5,%%r12; movq %1,%%r14; movq %7,%%r11;"\
|
||||||
|
"cmpq $4,%7;jb 33101"#ndim"f;"\
|
||||||
|
"33109"#ndim":\n\t"\
|
||||||
|
COMPUTE_m4(ndim)\
|
||||||
|
"subq $4,%7;cmpq $4,%7;jnb 33109"#ndim"b;"\
|
||||||
|
"33101"#ndim":\n\t"\
|
||||||
|
"cmpq $2,%7;jb 33104"#ndim"f;"\
|
||||||
|
COMPUTE_m2(ndim)\
|
||||||
|
"subq $2,%7;"\
|
||||||
|
"33104"#ndim":\n\t"\
|
||||||
|
"testq %7,%7;jz 33105"#ndim"f;"\
|
||||||
|
COMPUTE_m1(ndim)\
|
||||||
|
"33105"#ndim":\n\t"\
|
||||||
|
"movq %%r13,%4; movq %%r14,%1; movq %%r11,%7;"\
|
||||||
|
:"+r"(a_pointer),"+r"(b_pointer),"+r"(c_pointer),"+r"(ldc_in_bytes),"+r"(K),"+r"(ctemp),"+r"(const_val),"+r"(M),"+r"(next_b)\
|
||||||
|
::"r11","r12","r13","r14","r15","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14",\
|
||||||
|
"xmm15","cc","memory");\
|
||||||
|
a_pointer -= M * K; b_pointer += ndim * K; c_pointer += 2*(LDC * ndim - M);\
|
||||||
|
}
|
||||||
|
int __attribute__ ((noinline))
|
||||||
|
CNAME(BLASLONG m, BLASLONG n, BLASLONG k, double alphar, double alphai, double * __restrict__ A, double * __restrict__ B, double * __restrict__ C, BLASLONG LDC)
|
||||||
|
{
|
||||||
|
if(m==0||n==0||k==0) return 0;
|
||||||
|
int64_t ldc_in_bytes = (int64_t)LDC * sizeof(double) * 2;
|
||||||
|
double constval[2]; constval[0] = alphar; constval[1] = alphai;
|
||||||
|
double *const_val=constval;
|
||||||
|
int64_t M = (int64_t)m, K = (int64_t)k;
|
||||||
|
BLASLONG n_count = n;
|
||||||
|
double *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B;
|
||||||
|
for(;n_count>11;n_count-=12) COMPUTE(12)
|
||||||
|
for(;n_count>7;n_count-=8) COMPUTE(8)
|
||||||
|
for(;n_count>3;n_count-=4) COMPUTE(4)
|
||||||
|
for(;n_count>1;n_count-=2) COMPUTE(2)
|
||||||
|
if(n_count>0) COMPUTE(1)
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CGEMMKERNEL = ctrmm4x4V.S
|
CGEMMKERNEL = ctrmm4x4V.S
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZGEMMKERNEL = ztrmm4x4V.S
|
ZGEMMKERNEL = ztrmm4x4V.S
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
|
|
|
@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
SGEMMINCOPYOBJ = sgemm_incopy.o
|
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMITCOPYOBJ = sgemm_itcopy.o
|
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c
|
||||||
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
|
||||||
DGEMMINCOPYOBJ = dgemm_incopy.o
|
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMITCOPYOBJ = dgemm_itcopy.o
|
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CGEMMKERNEL = ctrmm4x4V.S
|
CGEMMKERNEL = ctrmm4x4V.S
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZGEMMKERNEL = ztrmm4x4V.S
|
ZGEMMKERNEL = ztrmm4x4V.S
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c
|
||||||
|
|
|
@ -94,26 +94,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
|
||||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
SGEMMONCOPYOBJ = sgemm_oncopy.o
|
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o
|
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
|
||||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
|
||||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
|
||||||
DGEMMONCOPYOBJ = dgemm_oncopy.o
|
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o
|
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
CGEMMONCOPYOBJ = cgemm_oncopy.o
|
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o
|
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
|
||||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
|
||||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
|
||||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o
|
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
|
||||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
|
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
|
||||||
|
|
||||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
|
||||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
|
||||||
|
|
|
@ -0,0 +1,38 @@
|
||||||
|
image:
|
||||||
|
- Visual Studio 2017
|
||||||
|
|
||||||
|
configuration: Release
|
||||||
|
clone_depth: 3
|
||||||
|
|
||||||
|
matrix:
|
||||||
|
fast_finish: false
|
||||||
|
|
||||||
|
skip_commits:
|
||||||
|
# Add [av skip] to commit messages
|
||||||
|
message: /\[av skip\]/
|
||||||
|
|
||||||
|
cache:
|
||||||
|
- '%APPVEYOR_BUILD_FOLDER%\build'
|
||||||
|
|
||||||
|
environment:
|
||||||
|
global:
|
||||||
|
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
|
||||||
|
|
||||||
|
install:
|
||||||
|
- call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||||
|
- conda config --add channels conda-forge --force
|
||||||
|
- conda install --yes --quiet flang jom
|
||||||
|
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
|
||||||
|
- set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
||||||
|
- set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
||||||
|
|
||||||
|
before_build:
|
||||||
|
- ps: if (-Not (Test-Path .\build)) { mkdir build }
|
||||||
|
- cd build
|
||||||
|
- cmake -G "NMake Makefiles JOM" -DCMAKE_Fortran_COMPILER=flang -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON ..
|
||||||
|
|
||||||
|
build_script:
|
||||||
|
- cmake --build .
|
||||||
|
|
||||||
|
test_script:
|
||||||
|
- ctest -j2
|
|
@ -35,3 +35,9 @@ LAPACKE/example/xexample*
|
||||||
# SED
|
# SED
|
||||||
SRC/*-e
|
SRC/*-e
|
||||||
LAPACKE/src/*-e
|
LAPACKE/src/*-e
|
||||||
|
build*
|
||||||
|
|
||||||
|
# DOCS documentation
|
||||||
|
DOCS/man
|
||||||
|
DOCS/explore-html
|
||||||
|
output_err
|
||||||
|
|
|
@ -1,33 +1,32 @@
|
||||||
language: cpp
|
language: c
|
||||||
|
dist: xenial
|
||||||
|
group: travis_latest
|
||||||
|
|
||||||
|
git:
|
||||||
|
depth: 3
|
||||||
|
quiet: true
|
||||||
|
|
||||||
addons:
|
addons:
|
||||||
apt:
|
apt:
|
||||||
sources:
|
|
||||||
- george-edison55-precise-backports # cmake
|
|
||||||
packages:
|
packages:
|
||||||
- cmake
|
|
||||||
- cmake-data
|
|
||||||
- gfortran
|
- gfortran
|
||||||
|
|
||||||
os:
|
matrix:
|
||||||
- linux
|
include:
|
||||||
- osx
|
- os: linux
|
||||||
|
env: CMAKE_BUILD_TYPE=Release
|
||||||
env:
|
- os: linux
|
||||||
- CMAKE_BUILD_TYPE=Release
|
env: CMAKE_BUILD_TYPE=Coverage
|
||||||
- CMAKE_BUILD_TYPE=Coverage
|
- os: osx
|
||||||
|
env: CMAKE_BUILD_TYPE=Release
|
||||||
install:
|
before_install:
|
||||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]];
|
- brew update > /dev/null
|
||||||
then
|
- brew install gcc > /dev/null
|
||||||
for pkg in gcc cmake; do
|
- os: osx
|
||||||
if brew list -1 | grep -q "^${pkg}\$"; then
|
env: CMAKE_BUILD_TYPE=Coverage
|
||||||
brew outdated $pkg || brew upgrade $pkg;
|
before_install:
|
||||||
else
|
- brew update > /dev/null
|
||||||
brew install $pkg;
|
- brew install gcc > /dev/null
|
||||||
fi
|
|
||||||
done
|
|
||||||
fi
|
|
||||||
|
|
||||||
script:
|
script:
|
||||||
- export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST
|
- export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST
|
||||||
|
|
|
@ -6,4 +6,5 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/blas.pc
|
${CMAKE_CURRENT_BINARY_DIR}/blas.pc
|
||||||
DESTINATION ${PKG_CONFIG_DIR}
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,13 +1,18 @@
|
||||||
include ../make.inc
|
TOPSRCDIR = ..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: blas
|
all: blas
|
||||||
|
|
||||||
|
.PHONY: blas
|
||||||
blas:
|
blas:
|
||||||
$(MAKE) -C SRC
|
$(MAKE) -C SRC
|
||||||
|
|
||||||
|
.PHONY: blas_testing
|
||||||
blas_testing: blas
|
blas_testing: blas
|
||||||
$(MAKE) -C TESTING run
|
$(MAKE) -C TESTING run
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanlib cleanexe cleantest
|
||||||
clean:
|
clean:
|
||||||
$(MAKE) -C SRC clean
|
$(MAKE) -C SRC clean
|
||||||
$(MAKE) -C TESTING clean
|
$(MAKE) -C TESTING clean
|
||||||
|
|
|
@ -1,5 +1,3 @@
|
||||||
include ../../make.inc
|
|
||||||
|
|
||||||
#######################################################################
|
#######################################################################
|
||||||
# This is the makefile to create a library for the BLAS.
|
# This is the makefile to create a library for the BLAS.
|
||||||
# The files are grouped as follows:
|
# The files are grouped as follows:
|
||||||
|
@ -55,6 +53,10 @@ include ../../make.inc
|
||||||
#
|
#
|
||||||
#######################################################################
|
#######################################################################
|
||||||
|
|
||||||
|
TOPSRCDIR = ../..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: $(BLASLIB)
|
all: $(BLASLIB)
|
||||||
|
|
||||||
#---------------------------------------------------------
|
#---------------------------------------------------------
|
||||||
|
@ -138,33 +140,32 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \
|
||||||
$(ZBLAS2) $(ZBLAS3) $(ALLBLAS)
|
$(ZBLAS2) $(ZBLAS3) $(ALLBLAS)
|
||||||
|
|
||||||
$(BLASLIB): $(ALLOBJ)
|
$(BLASLIB): $(ALLOBJ)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $^
|
$(AR) $(ARFLAGS) $@ $^
|
||||||
$(RANLIB) $@
|
$(RANLIB) $@
|
||||||
|
|
||||||
|
.PHONY: single double complex complex16
|
||||||
single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3)
|
single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
|
$(AR) $(ARFLAGS) $(BLASLIB) $^
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3)
|
double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
|
$(AR) $(ARFLAGS) $(BLASLIB) $^
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3)
|
complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
|
$(AR) $(ARFLAGS) $(BLASLIB) $^
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3)
|
complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3)
|
||||||
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
|
$(AR) $(ARFLAGS) $(BLASLIB) $^
|
||||||
$(RANLIB) $(BLASLIB)
|
$(RANLIB) $(BLASLIB)
|
||||||
|
|
||||||
FRC:
|
FRC:
|
||||||
@FRC=$(FRC)
|
@FRC=$(FRC)
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanlib
|
||||||
clean: cleanobj cleanlib
|
clean: cleanobj cleanlib
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
cleanlib:
|
cleanlib:
|
||||||
#rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas
|
#rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(FORTRAN) $(OPTS) -c -o $@ $<
|
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
*> \param[in] INCX
|
*> \param[in] INCX
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*> INCX is INTEGER
|
*> INCX is INTEGER
|
||||||
*> storage spacing between elements of SX
|
*> storage spacing between elements of CX
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
*> \param[in] INCX
|
*> \param[in] INCX
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*> INCX is INTEGER
|
*> INCX is INTEGER
|
||||||
*> storage spacing between elements of SX
|
*> storage spacing between elements of DX
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
|
@ -43,7 +43,7 @@
|
||||||
*> \param[in] INCX
|
*> \param[in] INCX
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*> INCX is INTEGER
|
*> INCX is INTEGER
|
||||||
*> storage spacing between elements of SX
|
*> storage spacing between elements of ZX
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
SBLAS1 = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'sdot.f', 'snrm2.f', 'srot.f', 'srotg.f', 'sscal.f', 'sswap.f', 'sdsdot.f', 'srotmg.f', 'srotm.f')
|
||||||
|
|
||||||
|
CBLAS1 = files('scabs1.f', 'scasum.f', 'scnrm2.f', 'icamax.f', 'caxpy.f', 'ccopy.f', 'cdotc.f', 'cdotu.f', 'csscal.f', 'crotg.f', 'cscal.f', 'cswap.f', 'csrot.f')
|
||||||
|
|
||||||
|
DBLAS1 = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'ddot.f', 'dnrm2.f', 'drot.f', 'drotg.f', 'dscal.f', 'dsdot.f', 'dswap.f', 'drotmg.f', 'drotm.f')
|
||||||
|
|
||||||
|
ZBLAS1 = files('dcabs1.f', 'dzasum.f', 'dznrm2.f', 'izamax.f', 'zaxpy.f', 'zcopy.f', 'zdotc.f', 'zdotu.f', 'zdscal.f', 'zrotg.f', 'zscal.f', 'zswap.f', 'zdrot.f')
|
||||||
|
|
||||||
|
CB1AUX = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'snrm2.f', 'sscal.f')
|
||||||
|
|
||||||
|
ZB1AUX = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'dnrm2.f', 'dscal.f')
|
||||||
|
|
||||||
|
ALLBLAS = files('lsame.f', 'xerbla.f', 'xerbla_array.f')
|
||||||
|
|
||||||
|
SBLAS2 = files('sgemv.f', 'sgbmv.f', 'ssymv.f', 'ssbmv.f', 'sspmv.f', 'strmv.f', 'stbmv.f', 'stpmv.f', 'strsv.f', 'stbsv.f', 'stpsv.f', 'sger.f', 'ssyr.f', 'sspr.f', 'ssyr2.f', 'sspr2.f')
|
||||||
|
|
||||||
|
CBLAS2 = files('cgemv.f', 'cgbmv.f', 'chemv.f', 'chbmv.f', 'chpmv.f', 'ctrmv.f', 'ctbmv.f', 'ctpmv.f', 'ctrsv.f', 'ctbsv.f', 'ctpsv.f', 'cgerc.f', 'cgeru.f', 'cher.f', 'chpr.f', 'cher2.f', 'chpr2.f')
|
||||||
|
|
||||||
|
DBLAS2 = files('dgemv.f', 'dgbmv.f', 'dsymv.f', 'dsbmv.f', 'dspmv.f', 'dtrmv.f', 'dtbmv.f', 'dtpmv.f', 'dtrsv.f', 'dtbsv.f', 'dtpsv.f', 'dger.f', 'dsyr.f', 'dspr.f', 'dsyr2.f', 'dspr2.f')
|
||||||
|
|
||||||
|
ZBLAS2 = files('zgemv.f', 'zgbmv.f', 'zhemv.f', 'zhbmv.f', 'zhpmv.f', 'ztrmv.f', 'ztbmv.f', 'ztpmv.f', 'ztrsv.f', 'ztbsv.f', 'ztpsv.f', 'zgerc.f', 'zgeru.f', 'zher.f', 'zhpr.f', 'zher2.f', 'zhpr2.f')
|
||||||
|
|
||||||
|
SBLAS3 = files('sgemm.f', 'ssymm.f', 'ssyrk.f', 'ssyr2k.f', 'strmm.f', 'strsm.f')
|
||||||
|
|
||||||
|
CBLAS3 = files('cgemm.f', 'csymm.f', 'csyrk.f', 'csyr2k.f', 'ctrmm.f', 'ctrsm.f', 'chemm.f', 'cherk.f', 'cher2k.f')
|
||||||
|
|
||||||
|
DBLAS3 = files('dgemm.f', 'dsymm.f', 'dsyrk.f', 'dsyr2k.f', 'dtrmm.f', 'dtrsm.f')
|
||||||
|
|
||||||
|
ZBLAS3 = files('zgemm.f', 'zsymm.f', 'zsyrk.f', 'zsyr2k.f', 'ztrmm.f', 'ztrsm.f', 'zhemm.f', 'zherk.f', 'zher2k.f')
|
|
@ -23,13 +23,13 @@
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
*>
|
*>
|
||||||
* Compute the inner product of two vectors with extended
|
*> Compute the inner product of two vectors with extended
|
||||||
* precision accumulation.
|
*> precision accumulation.
|
||||||
*
|
*>
|
||||||
* Returns S.P. result with dot product accumulated in D.P.
|
*> Returns S.P. result with dot product accumulated in D.P.
|
||||||
* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
|
*> SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
|
||||||
* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
|
*> where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
|
||||||
* defined in a similar way using INCY.
|
*> defined in a similar way using INCY.
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
*
|
||||||
* Arguments:
|
* Arguments:
|
||||||
|
@ -77,7 +77,14 @@
|
||||||
*> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
|
*> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
|
||||||
*> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
|
*> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
|
||||||
*
|
*
|
||||||
*> \ingroup complex_blas_level1
|
*> \author Univ. of Tennessee
|
||||||
|
*> \author Univ. of California Berkeley
|
||||||
|
*> \author Univ. of Colorado Denver
|
||||||
|
*> \author NAG Ltd.
|
||||||
|
*
|
||||||
|
*> \date November 2017
|
||||||
|
*
|
||||||
|
*> \ingroup single_blas_level1
|
||||||
*
|
*
|
||||||
*> \par Further Details:
|
*> \par Further Details:
|
||||||
* =====================
|
* =====================
|
||||||
|
@ -102,65 +109,7 @@
|
||||||
*> 920501 Reformatted the REFERENCES section. (WRB)
|
*> 920501 Reformatted the REFERENCES section. (WRB)
|
||||||
*> 070118 Reformat to LAPACK coding style
|
*> 070118 Reformat to LAPACK coding style
|
||||||
*> \endverbatim
|
*> \endverbatim
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Local Scalars ..
|
|
||||||
* DOUBLE PRECISION DSDOT
|
|
||||||
* INTEGER I,KX,KY,NS
|
|
||||||
* ..
|
|
||||||
* .. Intrinsic Functions ..
|
|
||||||
* INTRINSIC DBLE
|
|
||||||
* ..
|
|
||||||
* DSDOT = SB
|
|
||||||
* IF (N.LE.0) THEN
|
|
||||||
* SDSDOT = DSDOT
|
|
||||||
* RETURN
|
|
||||||
* END IF
|
|
||||||
* IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN
|
|
||||||
*
|
|
||||||
* Code for equal and positive increments.
|
|
||||||
*
|
|
||||||
* NS = N*INCX
|
|
||||||
* DO I = 1,NS,INCX
|
|
||||||
* DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
|
|
||||||
* END DO
|
|
||||||
* ELSE
|
|
||||||
*
|
|
||||||
* Code for unequal or nonpositive increments.
|
|
||||||
*
|
|
||||||
* KX = 1
|
|
||||||
* KY = 1
|
|
||||||
* IF (INCX.LT.0) KX = 1 + (1-N)*INCX
|
|
||||||
* IF (INCY.LT.0) KY = 1 + (1-N)*INCY
|
|
||||||
* DO I = 1,N
|
|
||||||
* DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
|
|
||||||
* KX = KX + INCX
|
|
||||||
* KY = KY + INCY
|
|
||||||
* END DO
|
|
||||||
* END IF
|
|
||||||
* SDSDOT = DSDOT
|
|
||||||
* RETURN
|
|
||||||
* END
|
|
||||||
*
|
|
||||||
*> \par Purpose:
|
|
||||||
* =============
|
|
||||||
*>
|
*>
|
||||||
*> \verbatim
|
|
||||||
*> \endverbatim
|
|
||||||
*
|
|
||||||
* Authors:
|
|
||||||
* ========
|
|
||||||
*
|
|
||||||
*> \author Univ. of Tennessee
|
|
||||||
*> \author Univ. of California Berkeley
|
|
||||||
*> \author Univ. of Colorado Denver
|
|
||||||
*> \author NAG Ltd.
|
|
||||||
*
|
|
||||||
*> \date November 2017
|
|
||||||
*
|
|
||||||
*> \ingroup single_blas_level1
|
|
||||||
*
|
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
|
REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
|
||||||
*
|
*
|
||||||
|
@ -175,71 +124,6 @@
|
||||||
* ..
|
* ..
|
||||||
* .. Array Arguments ..
|
* .. Array Arguments ..
|
||||||
REAL SX(*),SY(*)
|
REAL SX(*),SY(*)
|
||||||
* ..
|
|
||||||
*
|
|
||||||
* PURPOSE
|
|
||||||
* =======
|
|
||||||
*
|
|
||||||
* Compute the inner product of two vectors with extended
|
|
||||||
* precision accumulation.
|
|
||||||
*
|
|
||||||
* Returns S.P. result with dot product accumulated in D.P.
|
|
||||||
* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
|
|
||||||
* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
|
|
||||||
* defined in a similar way using INCY.
|
|
||||||
*
|
|
||||||
* AUTHOR
|
|
||||||
* ======
|
|
||||||
* Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
|
|
||||||
* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
|
|
||||||
*
|
|
||||||
* ARGUMENTS
|
|
||||||
* =========
|
|
||||||
*
|
|
||||||
* N (input) INTEGER
|
|
||||||
* number of elements in input vector(s)
|
|
||||||
*
|
|
||||||
* SB (input) REAL
|
|
||||||
* single precision scalar to be added to inner product
|
|
||||||
*
|
|
||||||
* SX (input) REAL array, dimension (N)
|
|
||||||
* single precision vector with N elements
|
|
||||||
*
|
|
||||||
* INCX (input) INTEGER
|
|
||||||
* storage spacing between elements of SX
|
|
||||||
*
|
|
||||||
* SY (input) REAL array, dimension (N)
|
|
||||||
* single precision vector with N elements
|
|
||||||
*
|
|
||||||
* INCY (input) INTEGER
|
|
||||||
* storage spacing between elements of SY
|
|
||||||
*
|
|
||||||
* SDSDOT (output) REAL
|
|
||||||
* single precision dot product (SB if N .LE. 0)
|
|
||||||
*
|
|
||||||
* Further Details
|
|
||||||
* ===============
|
|
||||||
*
|
|
||||||
* REFERENCES
|
|
||||||
*
|
|
||||||
* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
|
|
||||||
* Krogh, Basic linear algebra subprograms for Fortran
|
|
||||||
* usage, Algorithm No. 539, Transactions on Mathematical
|
|
||||||
* Software 5, 3 (September 1979), pp. 308-323.
|
|
||||||
*
|
|
||||||
* REVISION HISTORY (YYMMDD)
|
|
||||||
*
|
|
||||||
* 791001 DATE WRITTEN
|
|
||||||
* 890531 Changed all specific intrinsics to generic. (WRB)
|
|
||||||
* 890831 Modified array declarations. (WRB)
|
|
||||||
* 890831 REVISION DATE from Version 3.2
|
|
||||||
* 891214 Prologue converted to Version 4.0 format. (BAB)
|
|
||||||
* 920310 Corrected definition of LX in DESCRIPTION. (WRB)
|
|
||||||
* 920501 Reformatted the REFERENCES section. (WRB)
|
|
||||||
* 070118 Reformat to LAPACK coding style
|
|
||||||
*
|
|
||||||
* =====================================================================
|
|
||||||
*
|
|
||||||
* .. Local Scalars ..
|
* .. Local Scalars ..
|
||||||
DOUBLE PRECISION DSDOT
|
DOUBLE PRECISION DSDOT
|
||||||
INTEGER I,KX,KY,NS
|
INTEGER I,KX,KY,NS
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
include ../../make.inc
|
TOPSRCDIR = ../..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.PHONY: all single double complex complex16
|
||||||
all: single double complex complex16
|
all: single double complex complex16
|
||||||
single: xblat1s xblat2s xblat3s
|
single: xblat1s xblat2s xblat3s
|
||||||
double: xblat1d xblat2d xblat3d
|
double: xblat1d xblat2d xblat3d
|
||||||
|
@ -7,32 +9,33 @@ complex: xblat1c xblat2c xblat3c
|
||||||
complex16: xblat1z xblat2z xblat3z
|
complex16: xblat1z xblat2z xblat3z
|
||||||
|
|
||||||
xblat1s: sblat1.o $(BLASLIB)
|
xblat1s: sblat1.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat1d: dblat1.o $(BLASLIB)
|
xblat1d: dblat1.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat1c: cblat1.o $(BLASLIB)
|
xblat1c: cblat1.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat1z: zblat1.o $(BLASLIB)
|
xblat1z: zblat1.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
xblat2s: sblat2.o $(BLASLIB)
|
xblat2s: sblat2.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat2d: dblat2.o $(BLASLIB)
|
xblat2d: dblat2.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat2c: cblat2.o $(BLASLIB)
|
xblat2c: cblat2.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat2z: zblat2.o $(BLASLIB)
|
xblat2z: zblat2.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
xblat3s: sblat3.o $(BLASLIB)
|
xblat3s: sblat3.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat3d: dblat3.o $(BLASLIB)
|
xblat3d: dblat3.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat3c: cblat3.o $(BLASLIB)
|
xblat3c: cblat3.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xblat3z: zblat3.o $(BLASLIB)
|
xblat3z: zblat3.o $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
run: all
|
run: all
|
||||||
./xblat1s > sblat1.out
|
./xblat1s > sblat1.out
|
||||||
./xblat1d > dblat1.out
|
./xblat1d > dblat1.out
|
||||||
|
@ -47,6 +50,7 @@ run: all
|
||||||
./xblat3c < cblat3.in
|
./xblat3c < cblat3.in
|
||||||
./xblat3z < zblat3.in
|
./xblat3z < zblat3.in
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanexe cleantest
|
||||||
clean: cleanobj cleanexe cleantest
|
clean: cleanobj cleanexe cleantest
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
|
@ -54,6 +58,3 @@ cleanexe:
|
||||||
rm -f xblat*
|
rm -f xblat*
|
||||||
cleantest:
|
cleantest:
|
||||||
rm -f *.out core
|
rm -f *.out core
|
||||||
|
|
||||||
.f.o:
|
|
||||||
$(FORTRAN) $(OPTS) -c -o $@ $<
|
|
||||||
|
|
|
@ -619,7 +619,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -991,7 +991,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -946,7 +946,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -619,7 +619,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -14,6 +14,8 @@ if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND)
|
||||||
message(WARNING "Reverting to pre-defined include/lapacke_mangling.h")
|
message(WARNING "Reverting to pre-defined include/lapacke_mangling.h")
|
||||||
configure_file(include/lapacke_mangling_with_flags.h.in
|
configure_file(include/lapacke_mangling_with_flags.h.in
|
||||||
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h)
|
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h)
|
||||||
|
configure_file(include/cblas_mangling_with_flags.h.in
|
||||||
|
${LAPACK_BINARY_DIR}/include/cblas_mangling.h)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
include_directories(include ${LAPACK_BINARY_DIR}/include)
|
include_directories(include ${LAPACK_BINARY_DIR}/include)
|
||||||
|
@ -28,7 +30,10 @@ endforeach()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
append_subdir_files(CBLAS_INCLUDE "include")
|
append_subdir_files(CBLAS_INCLUDE "include")
|
||||||
install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h
|
||||||
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||||
|
COMPONENT Development
|
||||||
|
)
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
if(BUILD_TESTING)
|
if(BUILD_TESTING)
|
||||||
|
@ -45,7 +50,9 @@ endif()
|
||||||
set(_cblas_config_install_guard_target "")
|
set(_cblas_config_install_guard_target "")
|
||||||
if(ALL_TARGETS)
|
if(ALL_TARGETS)
|
||||||
install(EXPORT cblas-targets
|
install(EXPORT cblas-targets
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION})
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
||||||
|
COMPONENT Development
|
||||||
|
)
|
||||||
# Choose one of the cblas targets to use as a guard for
|
# Choose one of the cblas targets to use as a guard for
|
||||||
# cblas-config.cmake to load targets from the install tree.
|
# cblas-config.cmake to load targets from the install tree.
|
||||||
list(GET ALL_TARGETS 0 _cblas_config_install_guard_target)
|
list(GET ALL_TARGETS 0 _cblas_config_install_guard_target)
|
||||||
|
@ -82,4 +89,6 @@ install(FILES
|
||||||
)
|
)
|
||||||
|
|
||||||
#install(EXPORT cblas-targets
|
#install(EXPORT cblas-targets
|
||||||
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION})
|
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
|
||||||
|
# COMPONENT Development
|
||||||
|
# )
|
||||||
|
|
|
@ -1,19 +1,25 @@
|
||||||
include ../make.inc
|
TOPSRCDIR = ..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: cblas
|
all: cblas
|
||||||
|
|
||||||
|
.PHONY: cblas
|
||||||
cblas: include/cblas_mangling.h
|
cblas: include/cblas_mangling.h
|
||||||
$(MAKE) -C src
|
$(MAKE) -C src
|
||||||
|
|
||||||
include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in
|
include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in
|
||||||
cp $< $@
|
cp include/cblas_mangling_with_flags.h.in $@
|
||||||
|
|
||||||
|
.PHONY: cblas_testing
|
||||||
cblas_testing: cblas
|
cblas_testing: cblas
|
||||||
$(MAKE) -C testing run
|
$(MAKE) -C testing run
|
||||||
|
|
||||||
|
.PHONY: cblas_example
|
||||||
cblas_example: cblas
|
cblas_example: cblas
|
||||||
$(MAKE) -C examples
|
$(MAKE) -C examples
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanlib cleanexe cleantest
|
||||||
clean:
|
clean:
|
||||||
$(MAKE) -C src clean
|
$(MAKE) -C src clean
|
||||||
$(MAKE) -C testing clean
|
$(MAKE) -C testing clean
|
||||||
|
|
|
@ -1,17 +1,21 @@
|
||||||
include ../../make.inc
|
TOPSRCDIR = ../..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.SUFFIXES: .c .o
|
||||||
|
.c.o:
|
||||||
|
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: cblas_ex1 cblas_ex2
|
all: cblas_ex1 cblas_ex2
|
||||||
|
|
||||||
cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB)
|
cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB)
|
cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanexe
|
||||||
clean: cleanobj cleanexe
|
clean: cleanobj cleanexe
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
cleanexe:
|
cleanexe:
|
||||||
rm -f cblas_ex1 cblas_ex2
|
rm -f cblas_ex1 cblas_ex2
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
|
||||||
|
|
|
@ -47,7 +47,7 @@ int main ( )
|
||||||
a[m*3+1] = 6;
|
a[m*3+1] = 6;
|
||||||
a[m*3+2] = 7;
|
a[m*3+2] = 7;
|
||||||
a[m*3+3] = 8;
|
a[m*3+3] = 8;
|
||||||
/* The elemetns of x and y */
|
/* The elements of x and y */
|
||||||
x[0] = 1;
|
x[0] = 1;
|
||||||
x[1] = 2;
|
x[1] = 2;
|
||||||
x[2] = 1;
|
x[2] = 1;
|
||||||
|
|
|
@ -1,7 +1,13 @@
|
||||||
# This Makefile compiles the CBLAS routines
|
# This Makefile compiles the CBLAS routines
|
||||||
|
|
||||||
include ../../make.inc
|
TOPSRCDIR = ../..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.SUFFIXES: .c .o
|
||||||
|
.c.o:
|
||||||
|
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: $(CBLASLIB)
|
all: $(CBLASLIB)
|
||||||
|
|
||||||
# Error handling routines for level 2 & 3
|
# Error handling routines for level 2 & 3
|
||||||
|
@ -43,24 +49,25 @@ zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \
|
||||||
# Common files for level 1 single precision
|
# Common files for level 1 single precision
|
||||||
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o
|
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o
|
||||||
|
|
||||||
|
.PHONY: slib1 dlib1 clib1 zlib1
|
||||||
# Single precision real
|
# Single precision real
|
||||||
slib1: $(slev1) $(sclev1)
|
slib1: $(slev1) $(sclev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double precision real
|
# Double precision real
|
||||||
dlib1: $(dlev1)
|
dlib1: $(dlev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Single precision complex
|
# Single precision complex
|
||||||
clib1: $(clev1) $(sclev1)
|
clib1: $(clev1) $(sclev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double precision complex
|
# Double precision complex
|
||||||
zlib1: $(zlev1)
|
zlib1: $(zlev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -95,24 +102,25 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \
|
||||||
cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \
|
cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \
|
||||||
cblas_zhpr.o cblas_zhpr2.o
|
cblas_zhpr.o cblas_zhpr2.o
|
||||||
|
|
||||||
|
.PHONY: slib2 dlib2 clib2 zlib2
|
||||||
# Single precision real
|
# Single precision real
|
||||||
slib2: $(slev2) $(errhand)
|
slib2: $(slev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double precision real
|
# Double precision real
|
||||||
dlib2: $(dlev2) $(errhand)
|
dlib2: $(dlev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Single precision complex
|
# Single precision complex
|
||||||
clib2: $(clev2) $(errhand)
|
clib2: $(clev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double precision complex
|
# Double precision complex
|
||||||
zlib2: $(zlev2) $(errhand)
|
zlib2: $(zlev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
#
|
#
|
||||||
|
@ -141,24 +149,25 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \
|
||||||
cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \
|
cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \
|
||||||
cblas_zsyr2k.o
|
cblas_zsyr2k.o
|
||||||
|
|
||||||
|
.PHONY: slib3 dlib3 clib3 zlib3
|
||||||
# Single precision real
|
# Single precision real
|
||||||
slib3: $(slev3) $(errhand)
|
slib3: $(slev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double precision real
|
# Double precision real
|
||||||
dlib3: $(dlev3) $(errhand)
|
dlib3: $(dlev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Single precision complex
|
# Single precision complex
|
||||||
clib3: $(clev3) $(errhand)
|
clib3: $(clev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# Double precision complex
|
# Double precision complex
|
||||||
zlib3: $(zlev3) $(errhand)
|
zlib3: $(zlev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
|
|
||||||
|
@ -166,36 +175,33 @@ alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1)
|
||||||
alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2)
|
alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2)
|
||||||
alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3)
|
alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3)
|
||||||
|
|
||||||
|
.PHONY: all1 all2 all3
|
||||||
# All level 1
|
# All level 1
|
||||||
all1: $(alev1)
|
all1: $(alev1)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# All level 2
|
# All level 2
|
||||||
all2: $(alev2) $(errhand)
|
all2: $(alev2) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# All level 3
|
# All level 3
|
||||||
all3: $(alev3) $(errhand)
|
all3: $(alev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
|
$(AR) $(ARFLAGS) $(CBLASLIB) $^
|
||||||
$(RANLIB) $(CBLASLIB)
|
$(RANLIB) $(CBLASLIB)
|
||||||
|
|
||||||
# All levels and precisions
|
# All levels and precisions
|
||||||
$(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand)
|
$(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $^
|
$(AR) $(ARFLAGS) $@ $^
|
||||||
$(RANLIB) $@
|
$(RANLIB) $@
|
||||||
|
|
||||||
FRC:
|
FRC:
|
||||||
@FRC=$(FRC)
|
@FRC=$(FRC)
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanlib
|
||||||
clean: cleanobj cleanlib
|
clean: cleanobj cleanlib
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
cleanlib:
|
cleanlib:
|
||||||
rm -f $(CBLASLIB)
|
rm -f $(CBLASLIB)
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
|
||||||
.f.o:
|
|
||||||
$(FORTRAN) $(OPTS) -c -o $@ $<
|
|
||||||
|
|
|
@ -91,7 +91,7 @@ void cblas_sgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA,
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
cblas_xerbla(2, "cblas_sgemm",
|
cblas_xerbla(2, "cblas_sgemm",
|
||||||
"Illegal TransA setting, %d\n", TransA);
|
"Illegal TransB setting, %d\n", TransB);
|
||||||
CBLAS_CallFromC = 0;
|
CBLAS_CallFromC = 0;
|
||||||
RowMajorStrg = 0;
|
RowMajorStrg = 0;
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -2,7 +2,12 @@
|
||||||
# The Makefile compiles c wrappers and testers for CBLAS.
|
# The Makefile compiles c wrappers and testers for CBLAS.
|
||||||
#
|
#
|
||||||
|
|
||||||
include ../../make.inc
|
TOPSRCDIR = ../..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.SUFFIXES: .c .o
|
||||||
|
.c.o:
|
||||||
|
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
||||||
|
|
||||||
# Archive files necessary to compile
|
# Archive files necessary to compile
|
||||||
LIB = $(CBLASLIB) $(BLASLIB)
|
LIB = $(CBLASLIB) $(BLASLIB)
|
||||||
|
@ -27,6 +32,7 @@ ztestl1o = c_zblas1.o
|
||||||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o
|
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o
|
||||||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o
|
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o
|
||||||
|
|
||||||
|
.PHONY: all all1 all2 all3
|
||||||
all: all1 all2 all3
|
all: all1 all2 all3
|
||||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
|
||||||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
|
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
|
||||||
|
@ -38,37 +44,38 @@ all3: xscblat3 xdcblat3 xccblat3 xzcblat3
|
||||||
|
|
||||||
# Single real
|
# Single real
|
||||||
xscblat1: c_sblat1.o $(stestl1o) $(LIB)
|
xscblat1: c_sblat1.o $(stestl1o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xscblat2: c_sblat2.o $(stestl2o) $(LIB)
|
xscblat2: c_sblat2.o $(stestl2o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xscblat3: c_sblat3.o $(stestl3o) $(LIB)
|
xscblat3: c_sblat3.o $(stestl3o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
# Double real
|
# Double real
|
||||||
xdcblat1: c_dblat1.o $(dtestl1o) $(LIB)
|
xdcblat1: c_dblat1.o $(dtestl1o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xdcblat2: c_dblat2.o $(dtestl2o) $(LIB)
|
xdcblat2: c_dblat2.o $(dtestl2o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xdcblat3: c_dblat3.o $(dtestl3o) $(LIB)
|
xdcblat3: c_dblat3.o $(dtestl3o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
# Single complex
|
# Single complex
|
||||||
xccblat1: c_cblat1.o $(ctestl1o) $(LIB)
|
xccblat1: c_cblat1.o $(ctestl1o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xccblat2: c_cblat2.o $(ctestl2o) $(LIB)
|
xccblat2: c_cblat2.o $(ctestl2o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xccblat3: c_cblat3.o $(ctestl3o) $(LIB)
|
xccblat3: c_cblat3.o $(ctestl3o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
# Double complex
|
# Double complex
|
||||||
xzcblat1: c_zblat1.o $(ztestl1o) $(LIB)
|
xzcblat1: c_zblat1.o $(ztestl1o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xzcblat2: c_zblat2.o $(ztestl2o) $(LIB)
|
xzcblat2: c_zblat2.o $(ztestl2o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
xzcblat3: c_zblat3.o $(ztestl3o) $(LIB)
|
xzcblat3: c_zblat3.o $(ztestl3o) $(LIB)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
|
|
||||||
# RUN TESTS
|
# RUN TESTS
|
||||||
|
.PHONY: run
|
||||||
run: all
|
run: all
|
||||||
@echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--"
|
@echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--"
|
||||||
@./xscblat1 > stest1.out
|
@./xscblat1 > stest1.out
|
||||||
|
@ -95,6 +102,7 @@ run: all
|
||||||
@echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--"
|
@echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--"
|
||||||
@./xzcblat3 < zin3 > ztest3.out
|
@./xzcblat3 < zin3 > ztest3.out
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanexe cleantest
|
||||||
clean: cleanobj cleanexe cleantest
|
clean: cleanobj cleanexe cleantest
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
|
@ -102,9 +110,3 @@ cleanexe:
|
||||||
rm -f x*
|
rm -f x*
|
||||||
cleantest:
|
cleantest:
|
||||||
rm -f *.out core
|
rm -f *.out core
|
||||||
|
|
||||||
.SUFFIXES: .o .f .c
|
|
||||||
.c.o:
|
|
||||||
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
|
||||||
.f.o:
|
|
||||||
$(FORTRAN) $(OPTS) -c -o $@ $<
|
|
||||||
|
|
|
@ -577,7 +577,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -653,7 +653,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -653,7 +653,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -577,7 +577,7 @@
|
||||||
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
|
||||||
* ************************* STEST1 *****************************
|
* ************************* STEST1 *****************************
|
||||||
*
|
*
|
||||||
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
|
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
|
||||||
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
|
||||||
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
|
||||||
*
|
*
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# This module checks against various known compilers and thier respective
|
# This module checks against various known compilers and their respective
|
||||||
# flags to determine any specific flags needing to be set.
|
# flags to determine any specific flags needing to be set.
|
||||||
#
|
#
|
||||||
# 1. If FPE traps are enabled either abort or disable them
|
# 1. If FPE traps are enabled either abort or disable them
|
||||||
|
|
|
@ -20,7 +20,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY})
|
||||||
|
|
||||||
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||||
foreach (LANG ${ENABLED_LANGUAGES})
|
foreach (LANG ${ENABLED_LANGUAGES})
|
||||||
# Gcov evaluation is dependend on the used compiler. Check gcov support for
|
# Gcov evaluation is dependent on the used compiler. Check gcov support for
|
||||||
# each compiler that is used. If gcov binary was already found for this
|
# each compiler that is used. If gcov binary was already found for this
|
||||||
# compiler, do not try to find it again.
|
# compiler, do not try to find it again.
|
||||||
if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN)
|
if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN)
|
||||||
|
|
|
@ -42,7 +42,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY})
|
||||||
|
|
||||||
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||||
foreach (LANG ${ENABLED_LANGUAGES})
|
foreach (LANG ${ENABLED_LANGUAGES})
|
||||||
# Coverage flags are not dependend on language, but the used compiler. So
|
# Coverage flags are not dependent on language, but the used compiler. So
|
||||||
# instead of searching flags foreach language, search flags foreach compiler
|
# instead of searching flags foreach language, search flags foreach compiler
|
||||||
# used.
|
# used.
|
||||||
set(COMPILER ${CMAKE_${LANG}_COMPILER_ID})
|
set(COMPILER ${CMAKE_${LANG}_COMPILER_ID})
|
||||||
|
|
|
@ -24,7 +24,7 @@ message(STATUS "=========")
|
||||||
set(F77_OUTPUT_EXE "/Fe" CACHE INTERNAL
|
set(F77_OUTPUT_EXE "/Fe" CACHE INTERNAL
|
||||||
"Fortran compiler option for setting executable file name.")
|
"Fortran compiler option for setting executable file name.")
|
||||||
else()
|
else()
|
||||||
# in other case, let user specify their fortran configrations.
|
# in other case, let user specify their fortran configurations.
|
||||||
set(F77_OPTION_COMPILE "-c" CACHE STRING
|
set(F77_OPTION_COMPILE "-c" CACHE STRING
|
||||||
"Fortran compiler option for compiling without linking.")
|
"Fortran compiler option for compiling without linking.")
|
||||||
set(F77_OUTPUT_OBJ "-o" CACHE STRING
|
set(F77_OUTPUT_OBJ "-o" CACHE STRING
|
||||||
|
|
|
@ -5,6 +5,10 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
||||||
endif()
|
endif()
|
||||||
unset(_LAPACK_TARGET)
|
unset(_LAPACK_TARGET)
|
||||||
|
|
||||||
|
# Hint for project building against lapack
|
||||||
|
set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@")
|
||||||
|
|
||||||
# Report the blas and lapack raw or imported libraries.
|
# Report the blas and lapack raw or imported libraries.
|
||||||
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@")
|
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@")
|
||||||
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@")
|
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@")
|
||||||
|
set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES})
|
||||||
|
|
|
@ -8,8 +8,12 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
|
||||||
endif()
|
endif()
|
||||||
unset(_LAPACK_TARGET)
|
unset(_LAPACK_TARGET)
|
||||||
|
|
||||||
|
# Hint for project building against lapack
|
||||||
|
set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@")
|
||||||
|
|
||||||
# Report the blas and lapack raw or imported libraries.
|
# Report the blas and lapack raw or imported libraries.
|
||||||
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@")
|
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@")
|
||||||
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@")
|
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@")
|
||||||
|
set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES})
|
||||||
|
|
||||||
unset(_LAPACK_SELF_DIR)
|
unset(_LAPACK_SELF_DIR)
|
||||||
|
|
|
@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.12)
|
||||||
project(LAPACK Fortran C)
|
project(LAPACK Fortran C)
|
||||||
|
|
||||||
set(LAPACK_MAJOR_VERSION 3)
|
set(LAPACK_MAJOR_VERSION 3)
|
||||||
set(LAPACK_MINOR_VERSION 8)
|
set(LAPACK_MINOR_VERSION 9)
|
||||||
set(LAPACK_PATCH_VERSION 0)
|
set(LAPACK_PATCH_VERSION 0)
|
||||||
set(
|
set(
|
||||||
LAPACK_VERSION
|
LAPACK_VERSION
|
||||||
|
@ -13,6 +13,9 @@ set(
|
||||||
# Add the CMake directory for custon CMake modules
|
# Add the CMake directory for custon CMake modules
|
||||||
set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH})
|
set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH})
|
||||||
|
|
||||||
|
# Export all symbols on Windows when building shared libraries
|
||||||
|
SET(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
|
||||||
|
|
||||||
# Set a default build type if none was specified
|
# Set a default build type if none was specified
|
||||||
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||||
message(STATUS "Setting build type to 'Release' as none was specified.")
|
message(STATUS "Setting build type to 'Release' as none was specified.")
|
||||||
|
@ -21,8 +24,19 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
||||||
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage")
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
|
# Coverage
|
||||||
if(${CMAKE_BUILD_TYPE_UPPER} STREQUAL "COVERAGE")
|
set(_is_coverage_build 0)
|
||||||
|
set(_msg "Checking if build type is 'Coverage'")
|
||||||
|
message(STATUS "${_msg}")
|
||||||
|
if(NOT CMAKE_CONFIGURATION_TYPES)
|
||||||
|
string(TOLOWER ${CMAKE_BUILD_TYPE} _build_type_lc)
|
||||||
|
if(${_build_type_lc} STREQUAL "coverage")
|
||||||
|
set(_is_coverage_build 1)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
message(STATUS "${_msg}: ${_is_coverage_build}")
|
||||||
|
|
||||||
|
if(_is_coverage_build)
|
||||||
message(STATUS "Adding coverage")
|
message(STATUS "Adding coverage")
|
||||||
find_package(codecov)
|
find_package(codecov)
|
||||||
endif()
|
endif()
|
||||||
|
@ -58,18 +72,18 @@ include(PreventInSourceBuilds)
|
||||||
include(PreventInBuildInstalls)
|
include(PreventInBuildInstalls)
|
||||||
|
|
||||||
if(UNIX)
|
if(UNIX)
|
||||||
if("${CMAKE_Fortran_COMPILER}" MATCHES "ifort")
|
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel)
|
||||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict")
|
list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict")
|
||||||
endif()
|
endif()
|
||||||
if("${CMAKE_Fortran_COMPILER}" MATCHES "xlf")
|
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL)
|
||||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none")
|
list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none")
|
||||||
endif()
|
endif()
|
||||||
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler.
|
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler.
|
||||||
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin
|
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin
|
||||||
string(REPLACE \;mtsk\; \; CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES "${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}")
|
string(REPLACE \;mtsk\; \; CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES "${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq")
|
if(CMAKE_Fortran_COMPILER_ID STREQUAL Compaq)
|
||||||
if(WIN32)
|
if(WIN32)
|
||||||
if(CMAKE_GENERATOR STREQUAL "NMake Makefiles")
|
if(CMAKE_GENERATOR STREQUAL "NMake Makefiles")
|
||||||
get_filename_component(CMAKE_Fortran_COMPILER_CMDNAM ${CMAKE_Fortran_COMPILER} NAME_WE)
|
get_filename_component(CMAKE_Fortran_COMPILER_CMDNAM ${CMAKE_Fortran_COMPILER} NAME_WE)
|
||||||
|
@ -96,24 +110,16 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# Get Python
|
|
||||||
message(STATUS "Looking for Python greater than 2.6 - ${PYTHONINTERP_FOUND}")
|
|
||||||
find_package(PythonInterp 2.7) # lapack_testing.py uses features from python 2.7 and greater
|
|
||||||
if(PYTHONINTERP_FOUND)
|
|
||||||
message(STATUS "Using Python version ${PYTHON_VERSION_STRING}")
|
|
||||||
else()
|
|
||||||
message(STATUS "No suitable Python version found, so skipping summary tests.")
|
|
||||||
endif()
|
|
||||||
# --------------------------------------------------
|
|
||||||
|
|
||||||
|
# --------------------------------------------------
|
||||||
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets)
|
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets)
|
||||||
|
|
||||||
macro(lapack_install_library lib)
|
macro(lapack_install_library lib)
|
||||||
install(TARGETS ${lib}
|
install(TARGETS ${lib}
|
||||||
EXPORT ${LAPACK_INSTALL_EXPORT_NAME}
|
EXPORT ${LAPACK_INSTALL_EXPORT_NAME}
|
||||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT Development
|
||||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT RuntimeLibraries
|
||||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT RuntimeLibraries
|
||||||
)
|
)
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
|
@ -121,12 +127,22 @@ set(PKG_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# Testing
|
# Testing
|
||||||
option(BUILD_TESTING "Build tests" OFF)
|
option(BUILD_TESTING "Build tests" ${_is_coverage_build})
|
||||||
enable_testing()
|
|
||||||
include(CTest)
|
include(CTest)
|
||||||
enable_testing()
|
|
||||||
message(STATUS "Build tests: ${BUILD_TESTING}")
|
message(STATUS "Build tests: ${BUILD_TESTING}")
|
||||||
|
|
||||||
|
# lapack_testing.py uses features from python 2.7 and greater
|
||||||
|
if(BUILD_TESTING)
|
||||||
|
set(_msg "Looking for Python >= 2.7 needed for summary tests")
|
||||||
|
message(STATUS "${_msg}")
|
||||||
|
find_package(PythonInterp 2.7 QUIET)
|
||||||
|
if(PYTHONINTERP_FOUND)
|
||||||
|
message(STATUS "${_msg} - found (${PYTHON_VERSION_STRING})")
|
||||||
|
else()
|
||||||
|
message(STATUS "${_msg} - not found (skipping summary tests)")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# Organize output files. On Windows this also keeps .dll files next
|
# Organize output files. On Windows this also keeps .dll files next
|
||||||
# to the .exe files that need them, making tests easy to run.
|
# to the .exe files that need them, making tests easy to run.
|
||||||
|
@ -299,16 +315,40 @@ if(LAPACKE)
|
||||||
add_subdirectory(LAPACKE)
|
add_subdirectory(LAPACKE)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
#-------------------------------------
|
||||||
|
# BLAS++ / LAPACK++
|
||||||
|
option(BLAS++ "Build BLAS++" OFF)
|
||||||
|
option(LAPACK++ "Build LAPACK++" OFF)
|
||||||
|
|
||||||
|
|
||||||
|
function(_display_cpp_implementation_msg name)
|
||||||
|
string(TOLOWER ${name} name_lc)
|
||||||
|
message(STATUS "${name}++ enable")
|
||||||
|
message(STATUS "----------------")
|
||||||
|
message(STATUS "Thank you for your interest in ${name}++, a newly developed C++ API for ${name} library")
|
||||||
|
message(STATUS "The objective of ${name}++ is to provide a convenient, performance oriented API for development in the C++ language, that, for the most part, preserves established conventions, while, at the same time, takes advantages of modern C++ features, such as: namespaces, templates, exceptions, etc.")
|
||||||
|
message(STATUS "We are still working on integrating ${name}++ in our library. For the moment, you can download directly ${name_lc}++ from https://bitbucket.org/icl/${name_lc}pp")
|
||||||
|
message(STATUS "For support ${name}++ related question, please email: slate-user@icl.utk.edu")
|
||||||
|
message(STATUS "----------------")
|
||||||
|
endfunction()
|
||||||
|
if(BLAS++)
|
||||||
|
_display_cpp_implementation_msg("BLAS")
|
||||||
|
endif()
|
||||||
|
if(LAPACK++)
|
||||||
|
_display_cpp_implementation_msg("LAPACK")
|
||||||
|
endif()
|
||||||
|
|
||||||
# --------------------------------------------------
|
# --------------------------------------------------
|
||||||
# CPACK Packaging
|
# CPACK Packaging
|
||||||
|
|
||||||
set(CPACK_PACKAGE_NAME "LAPACK")
|
set(CPACK_PACKAGE_NAME "LAPACK")
|
||||||
set(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd")
|
set(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd")
|
||||||
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "LAPACK- Linear Algebra Package")
|
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "LAPACK- Linear Algebra Package")
|
||||||
set(CPACK_PACKAGE_VERSION_MAJOR 3)
|
set(CPACK_PACKAGE_VERSION_MAJOR ${LAPACK_MAJOR_VERSION})
|
||||||
set(CPACK_PACKAGE_VERSION_MINOR 5)
|
set(CPACK_PACKAGE_VERSION_MINOR ${LAPACK_MINOR_VERSION})
|
||||||
set(CPACK_PACKAGE_VERSION_PATCH 0)
|
set(CPACK_PACKAGE_VERSION_PATCH ${LAPACK_PATCH_VERSION})
|
||||||
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
||||||
|
set(CPACK_MONOLITHIC_INSTALL ON)
|
||||||
set(CPACK_PACKAGE_INSTALL_DIRECTORY "LAPACK")
|
set(CPACK_PACKAGE_INSTALL_DIRECTORY "LAPACK")
|
||||||
if(WIN32 AND NOT UNIX)
|
if(WIN32 AND NOT UNIX)
|
||||||
# There is a bug in NSI that does not handle full unix paths properly. Make
|
# There is a bug in NSI that does not handle full unix paths properly. Make
|
||||||
|
@ -347,7 +387,9 @@ endif()
|
||||||
set(_lapack_config_install_guard_target "")
|
set(_lapack_config_install_guard_target "")
|
||||||
if(ALL_TARGETS)
|
if(ALL_TARGETS)
|
||||||
install(EXPORT lapack-targets
|
install(EXPORT lapack-targets
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION})
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
|
||||||
|
COMPONENT Development
|
||||||
|
)
|
||||||
|
|
||||||
# Choose one of the lapack targets to use as a guard for
|
# Choose one of the lapack targets to use as a guard for
|
||||||
# lapack-config.cmake to load targets from the install tree.
|
# lapack-config.cmake to load targets from the install tree.
|
||||||
|
@ -382,6 +424,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_D
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc
|
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc
|
||||||
DESTINATION ${PKG_CONFIG_DIR}
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in
|
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in
|
||||||
|
@ -398,4 +441,6 @@ install(FILES
|
||||||
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake
|
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake
|
||||||
${LAPACK_BINARY_DIR}/lapack-config-version.cmake
|
${LAPACK_BINARY_DIR}/lapack-config-version.cmake
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
|
||||||
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK
|
||||||
# could be handy for archiving the generated documentation or if some version
|
# could be handy for archiving the generated documentation or if some version
|
||||||
# control system is used.
|
# control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = 3.8.0
|
PROJECT_NUMBER = 3.9.0
|
||||||
|
|
||||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||||
# for a project that appears at the top of each page and should give viewer a
|
# for a project that appears at the top of each page and should give viewer a
|
||||||
|
|
|
@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK
|
||||||
# could be handy for archiving the generated documentation or if some version
|
# could be handy for archiving the generated documentation or if some version
|
||||||
# control system is used.
|
# control system is used.
|
||||||
|
|
||||||
PROJECT_NUMBER = 3.8.0
|
PROJECT_NUMBER = 3.9.0
|
||||||
|
|
||||||
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
# Using the PROJECT_BRIEF tag one can provide an optional one line description
|
||||||
# for a project that appears at the top of each page and should give viewer a
|
# for a project that appears at the top of each page and should give viewer a
|
||||||
|
|
|
@ -439,39 +439,39 @@ SHELL = /bin/sh
|
||||||
\end{quote}
|
\end{quote}
|
||||||
and it will need to be modified to \texttt{SHELL = /sbin/sh} if you are
|
and it will need to be modified to \texttt{SHELL = /sbin/sh} if you are
|
||||||
installing LAPACK on an SGI architecture.
|
installing LAPACK on an SGI architecture.
|
||||||
Second, you will
|
Next, you will need to modify \texttt{FC}, \texttt{FFLAGS},
|
||||||
need to modify the \texttt{PLAT} definition, which is appended to all
|
\texttt{FFLAGS\_DRV}, \texttt{FFLAGS\_NOOPT}, and \texttt{LDFLAGS} to specify
|
||||||
library names, to specify the architecture to which you are installing
|
|
||||||
LAPACK. This features avoids confusion in library names when you are
|
|
||||||
installing LAPACK on more than one architecture. Next, you will need
|
|
||||||
to modify \texttt{FORTRAN}, \texttt{OPTS}, \texttt{DRVOPTS}, \texttt{NOOPT}, \texttt{LOADER},
|
|
||||||
and \texttt{LOADOPTS} to specify
|
|
||||||
the compiler, compiler options, compiler options for the testing and
|
the compiler, compiler options, compiler options for the testing and
|
||||||
timing\footnotemark[\value{footnote}] main programs, loader, loader options.
|
timing\footnotemark[\value{footnote}] main programs, and linker options.
|
||||||
Next you will have to choose which function you will use to time in the \texttt{SECOND} and \texttt{DSECND} routines.
|
Next you will have to choose which function you will use to time in the
|
||||||
|
\texttt{SECOND} and \texttt{DSECND} routines.
|
||||||
\begin{verbatim}
|
\begin{verbatim}
|
||||||
#The Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME
|
# Default: SECOND and DSECND will use a call to the
|
||||||
TIMER = EXT_ETIME
|
# EXTERNAL FUNCTION ETIME
|
||||||
# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_
|
#TIMER = EXT_ETIME
|
||||||
# TIMER = EXT_ETIME_
|
# For RS6K: SECOND and DSECND will use a call to the
|
||||||
# For gfortran compiler: SECOND and DSECND will use the INTERNAL FUNCTION ETIME
|
# EXTERNAL FUNCTION ETIME_
|
||||||
# TIMER = INT_ETIME
|
#TIMER = EXT_ETIME_
|
||||||
# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...)
|
# For gfortran compiler: SECOND and DSECND will use a call to the
|
||||||
# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME
|
# INTERNAL FUNCTION ETIME
|
||||||
# TIMER = INT_CPU_TIME
|
TIMER = INT_ETIME
|
||||||
# If neither of this works...you can use the NONE value...
|
# If your Fortran compiler does not provide etime (like Nag Fortran
|
||||||
# In that case, SECOND and DSECND will always return 0
|
# Compiler, etc...) SECOND and DSECND will use a call to the
|
||||||
# TIMER = NONE
|
# INTERNAL FUNCTION CPU_TIME
|
||||||
|
#TIMER = INT_CPU_TIME
|
||||||
|
# If none of these work, you can use the NONE value.
|
||||||
|
# In that case, SECOND and DSECND will always return 0.
|
||||||
|
#TIMER = NONE
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
Refer to the section~\ref{second} to get more information.
|
Refer to the section~\ref{second} to get more information.
|
||||||
|
|
||||||
|
|
||||||
Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver,
|
Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver,
|
||||||
archiver options, and ranlib for your machine. If your architecture
|
archiver options, and ranlib for your machine. If your architecture
|
||||||
does not require \texttt{ranlib} to be run after each archive command (as
|
does not require \texttt{ranlib} to be run after each archive command (as
|
||||||
is the case with CRAY computers running UNICOS, Hewlett Packard
|
is the case with CRAY computers running UNICOS, Hewlett Packard
|
||||||
computers running HP-UX, or SUN SPARCstations running Solaris), set
|
computers running HP-UX, or SUN SPARCstations running Solaris), set
|
||||||
\texttt{ranlib=echo}. And finally, you must
|
\texttt{RANLIB = echo}. And finally, you must
|
||||||
modify the \texttt{BLASLIB} definition to specify the BLAS library to which
|
modify the \texttt{BLASLIB} definition to specify the BLAS library to which
|
||||||
you will be linking. If an optimized version of the BLAS is available
|
you will be linking. If an optimized version of the BLAS is available
|
||||||
on your machine, you are highly recommended to link to that library.
|
on your machine, you are highly recommended to link to that library.
|
||||||
|
@ -721,24 +721,24 @@ The version that will be used depends on the value of the TIMER variable in the
|
||||||
|
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item If ETIME is available as an external function, set the value of the TIMER variable in your
|
\item If ETIME is available as an external function, set the value of the TIMER variable in your
|
||||||
make.inc to \texttt{EXT\_ETIME}:\texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used.
|
make.inc to \texttt{EXT\_ETIME}: \texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used.
|
||||||
Usually on HPPA architectures,
|
Usually on HPPA architectures,
|
||||||
the compiler and loader flag \texttt{+U77} should be included to access
|
the compiler and linker flag \texttt{+U77} should be included to access
|
||||||
the function \texttt{ETIME}.
|
the function \texttt{ETIME}.
|
||||||
|
|
||||||
\item If ETIME\_ is available as an external function, set the value of the TIMER variable in your make.inc
|
\item If ETIME\_ is available as an external function, set the value of the TIMER variable in your make.inc
|
||||||
to \texttt{EXT\_ETIME\_}:\texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used.
|
to \texttt{EXT\_ETIME\_}: \texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used.
|
||||||
It is the case on some IBM architectures such as IBM RS/6000s.
|
It is the case on some IBM architectures such as IBM RS/6000s.
|
||||||
|
|
||||||
\item If ETIME is available as an internal function, set the value of the TIMER variable in your make.inc
|
\item If ETIME is available as an internal function, set the value of the TIMER variable in your make.inc
|
||||||
to \texttt{INT\_ETIME}:\texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used.
|
to \texttt{INT\_ETIME}: \texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used.
|
||||||
This is the case with gfortan.
|
This is the case with gfortan.
|
||||||
|
|
||||||
\item If CPU\_TIME is available as an internal function, set the value of the TIMER variable in your make.inc
|
\item If CPU\_TIME is available as an internal function, set the value of the TIMER variable in your make.inc
|
||||||
to \texttt{INT\_CPU\_TIME}:\texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used.
|
to \texttt{INT\_CPU\_TIME}: \texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used.
|
||||||
|
|
||||||
\item If none of these function is available, set the value of the TIMER variable in your make.inc
|
\item If none of these function is available, set the value of the TIMER variable in your make.inc
|
||||||
to \texttt{NONE:}\texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used.
|
to \texttt{NONE}: \texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used.
|
||||||
These routines will always return zero.
|
These routines will always return zero.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
|
@ -829,8 +829,8 @@ data type to the library if necessary.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\noindent
|
\noindent
|
||||||
The BLAS library is created in \texttt{LAPACK/blas\_PLAT.a}, where
|
The BLAS library is created in \texttt{LAPACK/librefblas.a},
|
||||||
\texttt{PLAT} is the user-defined architecture suffix specified in the file
|
or in the user-defined location specified by \texttt{BLASLIB} in the file
|
||||||
\texttt{LAPACK/make.inc}.
|
\texttt{LAPACK/make.inc}.
|
||||||
|
|
||||||
\subsection{Run the BLAS Test Programs}\label{testblas}
|
\subsection{Run the BLAS Test Programs}\label{testblas}
|
||||||
|
@ -882,8 +882,8 @@ data type to the library if necessary.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\noindent
|
\noindent
|
||||||
The LAPACK library is created in \texttt{LAPACK/lapack\_PLAT.a}, where
|
The LAPACK library is created in \texttt{LAPACK/liblapack.a},
|
||||||
\texttt{PLAT} is the user-defined architecture suffix specified in the file
|
or in the user-defined location specified by \texttt{LAPACKLIB} in the file
|
||||||
\texttt{LAPACK/make.inc}.
|
\texttt{LAPACK/make.inc}.
|
||||||
|
|
||||||
\subsection{Create the Test Matrix Generator Library}
|
\subsection{Create the Test Matrix Generator Library}
|
||||||
|
@ -902,9 +902,9 @@ data type to the library if necessary.
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
|
|
||||||
\noindent
|
\noindent
|
||||||
The test matrix generator library is created in \texttt{LAPACK/tmglib\_PLAT.a},
|
The test matrix generator library is created in \texttt{LAPACK/libtmglib.a},
|
||||||
where \texttt{PLAT} is the user-defined architecture suffix specified in the
|
or in the user-defined location specified by \texttt{TMGLIB} in the file
|
||||||
file \texttt{LAPACK/make.inc}.
|
\texttt{LAPACK/make.inc}.
|
||||||
|
|
||||||
\subsection{Run the LAPACK Test Programs}
|
\subsection{Run the LAPACK Test Programs}
|
||||||
|
|
||||||
|
@ -1114,9 +1114,7 @@ To make a library of the instrumented LAPACK routines, first
|
||||||
go to \texttt{LAPACK/TIMING/LIN/LINSRC} and type \texttt{make} followed
|
go to \texttt{LAPACK/TIMING/LIN/LINSRC} and type \texttt{make} followed
|
||||||
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}.
|
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}.
|
||||||
The library of instrumented code is created in
|
The library of instrumented code is created in
|
||||||
\texttt{LAPACK/TIMING/LIN/linsrc\_PLAT.a},
|
\texttt{LAPACK/TIMING/LIN/linsrc.a}.
|
||||||
where \texttt{PLAT} is the user-defined architecture suffix specified in the
|
|
||||||
file \texttt{LAPACK/make.inc}.
|
|
||||||
\end{sloppypar}
|
\end{sloppypar}
|
||||||
|
|
||||||
\item[b)]
|
\item[b)]
|
||||||
|
@ -1251,9 +1249,7 @@ To make a library of the instrumented LAPACK routines, first
|
||||||
go to \texttt{LAPACK/TIMING/EIG/EIGSRC} and type \texttt{make} followed
|
go to \texttt{LAPACK/TIMING/EIG/EIGSRC} and type \texttt{make} followed
|
||||||
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}.
|
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}.
|
||||||
The library of instrumented code is created in
|
The library of instrumented code is created in
|
||||||
\texttt{LAPACK/TIMING/EIG/eigsrc\_PLAT.a},
|
\texttt{LAPACK/TIMING/EIG/eigsrc.a}.
|
||||||
where \texttt{PLAT} is the user-defined architecture suffix specified in the
|
|
||||||
file \texttt{LAPACK/make.inc}.
|
|
||||||
\end{sloppypar}
|
\end{sloppypar}
|
||||||
|
|
||||||
\item[b)]
|
\item[b)]
|
||||||
|
@ -1389,7 +1385,7 @@ installing LAPACK on an SGI architecture.
|
||||||
\section{ETIME}
|
\section{ETIME}
|
||||||
|
|
||||||
On HPPA architectures,
|
On HPPA architectures,
|
||||||
the compiler and loader flag \texttt{+U77} should be included to access
|
the compiler and linker flag \texttt{+U77} should be included to access
|
||||||
the function \texttt{ETIME}.
|
the function \texttt{ETIME}.
|
||||||
|
|
||||||
\section{ILAENV and IEEE-754 compliance}
|
\section{ILAENV and IEEE-754 compliance}
|
||||||
|
@ -1494,13 +1490,13 @@ has two options: increase your stack size, or force all local variables
|
||||||
to be allocated statically.
|
to be allocated statically.
|
||||||
|
|
||||||
On HPPA architectures, the
|
On HPPA architectures, the
|
||||||
compiler and loader flag \texttt{-K} should be used when compiling these testing
|
compiler and linker flag \texttt{-K} should be used when compiling these testing
|
||||||
and timing main programs to avoid such a stack overflow. I.e., set
|
and timing main programs to avoid such a stack overflow. I.e., set
|
||||||
\texttt{DRVOPTS = -K} in the \texttt{LAPACK/make.inc} file.
|
\texttt{FFLAGS\_DRV = -K} in the \texttt{LAPACK/make.inc} file.
|
||||||
|
|
||||||
For similar reasons,
|
For similar reasons,
|
||||||
on SGI architectures, the compiler and loader flag \texttt{-static} should be
|
on SGI architectures, the compiler and linker flag \texttt{-static} should be
|
||||||
used. I.e., set \texttt{DRVOPTS = -static} in the \texttt{LAPACK/make.inc} file.
|
used. I.e., set \texttt{FFLAGS\_DRV = -static} in the \texttt{LAPACK/make.inc} file.
|
||||||
|
|
||||||
\section{IEEE arithmetic}
|
\section{IEEE arithmetic}
|
||||||
|
|
||||||
|
|
|
@ -1,30 +1,33 @@
|
||||||
include ../make.inc
|
TOPSRCDIR = ..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.PHONY: all testlsame testslamch testdlamch testsecond testdsecnd testieee testversion
|
||||||
all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion
|
all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion
|
||||||
|
|
||||||
testlsame: lsame.o lsametst.o
|
testlsame: lsame.o lsametst.o
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
testslamch: slamch.o lsame.o slamchtst.o
|
testslamch: slamch.o lsame.o slamchtst.o
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
testdlamch: dlamch.o lsame.o dlamchtst.o
|
testdlamch: dlamch.o lsame.o dlamchtst.o
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
testsecond: second_$(TIMER).o secondtst.o
|
testsecond: second_$(TIMER).o secondtst.o
|
||||||
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)"
|
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)"
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
testdsecnd: dsecnd_$(TIMER).o dsecndtst.o
|
testdsecnd: dsecnd_$(TIMER).o dsecndtst.o
|
||||||
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)"
|
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)"
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
testieee: tstiee.o
|
testieee: tstiee.o
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
testversion: ilaver.o LAPACK_version.o
|
testversion: ilaver.o LAPACK_version.o
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
|
|
||||||
|
.PHONY: run
|
||||||
run: all
|
run: all
|
||||||
./testlsame
|
./testlsame
|
||||||
./testslamch
|
./testslamch
|
||||||
|
@ -34,6 +37,7 @@ run: all
|
||||||
./testieee
|
./testieee
|
||||||
./testversion
|
./testversion
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanexe cleantest
|
||||||
clean: cleanobj cleanexe cleantest
|
clean: cleanobj cleanexe cleantest
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
|
@ -42,9 +46,5 @@ cleanexe:
|
||||||
cleantest:
|
cleantest:
|
||||||
rm -f core
|
rm -f core
|
||||||
|
|
||||||
.SUFFIXES: .o .f
|
slamch.o: slamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
||||||
.f.o:
|
dlamch.o: dlamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
|
||||||
$(FORTRAN) $(OPTS) -c -o $@ $<
|
|
||||||
|
|
||||||
slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $<
|
|
||||||
dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $<
|
|
||||||
|
|
|
@ -10,6 +10,10 @@
|
||||||
*
|
*
|
||||||
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
|
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
|
||||||
*
|
*
|
||||||
|
* .. Scalar Arguments ..
|
||||||
|
* CHARACTER CMACH
|
||||||
|
* ..
|
||||||
|
*
|
||||||
*
|
*
|
||||||
*> \par Purpose:
|
*> \par Purpose:
|
||||||
* =============
|
* =============
|
||||||
|
@ -24,6 +28,7 @@
|
||||||
*
|
*
|
||||||
*> \param[in] CMACH
|
*> \param[in] CMACH
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
|
*> CMACH is CHARACTER*1
|
||||||
*> Specifies the value to be returned by DLAMCH:
|
*> Specifies the value to be returned by DLAMCH:
|
||||||
*> = 'E' or 'e', DLAMCH := eps
|
*> = 'E' or 'e', DLAMCH := eps
|
||||||
*> = 'S' or 's , DLAMCH := sfmin
|
*> = 'S' or 's , DLAMCH := sfmin
|
||||||
|
|
|
@ -10,6 +10,10 @@
|
||||||
*
|
*
|
||||||
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
|
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
|
||||||
*
|
*
|
||||||
|
* .. Scalar Arguments ..
|
||||||
|
* CHARACTER CMACH
|
||||||
|
* ..
|
||||||
|
*
|
||||||
*
|
*
|
||||||
*> \par Purpose:
|
*> \par Purpose:
|
||||||
* =============
|
* =============
|
||||||
|
|
|
@ -25,12 +25,15 @@
|
||||||
* ==========
|
* ==========
|
||||||
*
|
*
|
||||||
*> \param[out] VERS_MAJOR
|
*> \param[out] VERS_MAJOR
|
||||||
|
*> VERS_MAJOR is INTEGER
|
||||||
*> return the lapack major version
|
*> return the lapack major version
|
||||||
*>
|
*>
|
||||||
*> \param[out] VERS_MINOR
|
*> \param[out] VERS_MINOR
|
||||||
|
*> VERS_MINOR is INTEGER
|
||||||
*> return the lapack minor version from the major version
|
*> return the lapack minor version from the major version
|
||||||
*>
|
*>
|
||||||
*> \param[out] VERS_PATCH
|
*> \param[out] VERS_PATCH
|
||||||
|
*> VERS_PATCH is INTEGER
|
||||||
*> return the lapack patch version from the minor version
|
*> return the lapack patch version from the minor version
|
||||||
*
|
*
|
||||||
* Authors:
|
* Authors:
|
||||||
|
@ -41,24 +44,23 @@
|
||||||
*> \author Univ. of Colorado Denver
|
*> \author Univ. of Colorado Denver
|
||||||
*> \author NAG Ltd.
|
*> \author NAG Ltd.
|
||||||
*
|
*
|
||||||
*> \date June 2017
|
*> \date November 2019
|
||||||
*
|
*
|
||||||
*> \ingroup auxOTHERauxiliary
|
*> \ingroup auxOTHERauxiliary
|
||||||
*
|
*
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH )
|
SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH )
|
||||||
*
|
*
|
||||||
* -- LAPACK computational routine (version 3.7.1) --
|
* -- LAPACK computational routine --
|
||||||
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
* -- LAPACK is a software package provided by Univ. of Tennessee, --
|
||||||
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
|
||||||
* June 2017
|
|
||||||
*
|
*
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
*
|
*
|
||||||
INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
VERS_MAJOR = 3
|
VERS_MAJOR = 3
|
||||||
VERS_MINOR = 8
|
VERS_MINOR = 9
|
||||||
VERS_PATCH = 0
|
VERS_PATCH = 0
|
||||||
* =====================================================================
|
* =====================================================================
|
||||||
*
|
*
|
||||||
|
|
|
@ -11,26 +11,24 @@ SHELL = /bin/sh
|
||||||
CC = cc
|
CC = cc
|
||||||
CFLAGS = -O4
|
CFLAGS = -O4
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = f77
|
FC = f77
|
||||||
OPTS = -O4 -fpe1
|
FFLAGS = -O4 -fpe1
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT =
|
FFLAGS_NOOPT =
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = f77
|
LDFLAGS =
|
||||||
LOADOPTS =
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -74,9 +72,9 @@ TIMER = EXT_ETIME
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = ../../librefblas.a
|
#BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
BLASLIB = -ldxml
|
BLASLIB = -ldxml
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,26 +11,24 @@ SHELL = /bin/sh
|
||||||
CC = cc
|
CC = cc
|
||||||
CFLAGS =
|
CFLAGS =
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = f77
|
FC = f77
|
||||||
OPTS = +O4 +U77
|
FFLAGS = +O4 +U77
|
||||||
DRVOPTS = $(OPTS) -K
|
FFLAGS_DRV = $(FFLAGS) -K
|
||||||
NOOPT = +U77
|
FFLAGS_NOOPT = +U77
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = f77
|
LDFLAGS =
|
||||||
LOADOPTS = -Aa +U77
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -74,9 +72,9 @@ TIMER = EXT_ETIME
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = ../../librefblas.a
|
#BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
BLASLIB = -lblas
|
BLASLIB = -lblas
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,29 +11,26 @@ SHELL = /sbin/sh
|
||||||
CC = cc
|
CC = cc
|
||||||
CFLAGS = -O3
|
CFLAGS = -O3
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = f77
|
FC = f77
|
||||||
OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
|
FFLAGS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
|
||||||
#OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
|
#FFLAGS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
|
||||||
DRVOPTS = $(OPTS) -static
|
FFLAGS_DRV = $(FFLAGS) -static
|
||||||
NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
|
FFLAGS_NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
|
||||||
#NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
|
#FFLAGS_NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = f77
|
LDFLAGS =
|
||||||
LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
|
|
||||||
#LOADOPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -78,8 +75,8 @@ TIMER = EXT_ETIME
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = -lblas
|
#BLASLIB = -lblas
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,29 +11,26 @@ SHELL = /sbin/sh
|
||||||
CC = cc
|
CC = cc
|
||||||
CFLAGS = -O3
|
CFLAGS = -O3
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = f77
|
FC = f77
|
||||||
OPTS = -O3 -64 -mips4 -r10000
|
FFLAGS = -O3 -64 -mips4 -r10000
|
||||||
#OPTS = -O3 -64 -mips4 -r10000 -mp
|
#FFLAGS = -O3 -64 -mips4 -r10000 -mp
|
||||||
DRVOPTS = $(OPTS) -static
|
FFLAGS_DRV = $(FFLAGS) -static
|
||||||
NOOPT = -64 -mips4 -r10000
|
FFLAGS_NOOPT = -64 -mips4 -r10000
|
||||||
#NOOPT = -64 -mips4 -r10000 -mp
|
#FFLAGS_NOOPT = -64 -mips4 -r10000 -mp
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = f77
|
LDFLAGS =
|
||||||
LOADOPTS = -O3 -64 -mips4 -r10000
|
|
||||||
#LOADOPTS = -O3 -64 -mips4 -r10000 -mp
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -79,8 +76,8 @@ TIMER = EXT_ETIME
|
||||||
#
|
#
|
||||||
BLASLIB = -lblas
|
BLASLIB = -lblas
|
||||||
#BLASLIB = -lblas_mp
|
#BLASLIB = -lblas_mp
|
||||||
#BLASLIB = ../../librefblas.a
|
#BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,26 +11,24 @@ SHELL = /sbin/sh
|
||||||
CC = cc
|
CC = cc
|
||||||
CFLAGS = -O4
|
CFLAGS = -O4
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = f77
|
FC = f77
|
||||||
OPTS = -O4
|
FFLAGS = -O4
|
||||||
DRVOPTS = $(OPTS) -static
|
FFLAGS_DRV = $(FFLAGS) -static
|
||||||
NOOPT =
|
FFLAGS_NOOPT =
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = f77
|
LDFLAGS =
|
||||||
LOADOPTS =
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -75,8 +73,8 @@ TIMER = EXT_ETIME
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = -lblas
|
#BLASLIB = -lblas
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,26 +11,24 @@ SHELL = /bin/sh
|
||||||
CC = cc
|
CC = cc
|
||||||
CFLAGS = -O3
|
CFLAGS = -O3
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = f77
|
FC = f77
|
||||||
OPTS = -dalign -O4 -fast
|
FFLAGS = -dalign -O4 -fast
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT =
|
FFLAGS_NOOPT =
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = f77
|
LDFLAGS =
|
||||||
LOADOPTS = -dalign -O4 -fast
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -75,8 +73,8 @@ TIMER = EXT_ETIME
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = -lblas
|
#BLASLIB = -lblas
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,30 +11,27 @@ SHELL = /bin/sh
|
||||||
CC = cc
|
CC = cc
|
||||||
CFLAGS = -O3
|
CFLAGS = -O3
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = f77
|
FC = f77
|
||||||
#OPTS = -O4 -u -f -mt
|
#FFLAGS = -O4 -u -f -mt
|
||||||
#OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa
|
#FFLAGS = -u -f -dalign -native -xO5 -xarch=v8plusa
|
||||||
OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa
|
FFLAGS = -u -f -dalign -native -xO2 -xarch=v8plusa
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT = -u -f
|
FFLAGS_NOOPT = -u -f
|
||||||
#NOOPT = -u -f -mt
|
#FFLAGS_NOOPT = -u -f -mt
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = f77
|
LDFLAGS =
|
||||||
#LOADOPTS = -mt
|
|
||||||
LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -78,10 +75,10 @@ TIMER = EXT_ETIME
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = ../../librefblas.a
|
#BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
#BLASLIB = -xlic_lib=sunperf_mt
|
#BLASLIB = -xlic_lib=sunperf_mt
|
||||||
BLASLIB = -xlic_lib=sunperf
|
BLASLIB = -xlic_lib=sunperf
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,27 +11,25 @@ SHELL = /bin/sh
|
||||||
CC = xlc
|
CC = xlc
|
||||||
CFLAGS = -O3 -qnosave
|
CFLAGS = -O3 -qnosave
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = xlf
|
FC = xlf
|
||||||
OPTS = -O3 -qfixed -qnosave
|
FFLAGS = -O3 -qfixed -qnosave
|
||||||
# For -O2, add -qstrict=none
|
# For -O2, add -qstrict=none
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT = -O0 -qfixed -qnosave
|
FFLAGS_NOOPT = -O0 -qfixed -qnosave
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = xlf
|
LDFLAGS =
|
||||||
LOADOPTS = -qnosave
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -75,9 +73,9 @@ TIMER = EXT_ETIME_
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = ../../librefblas.a
|
#BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
BLASLIB = -lessl
|
BLASLIB = -lessl
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,7 +11,7 @@ SHELL = /bin/sh
|
||||||
CC = gcc
|
CC = gcc
|
||||||
CFLAGS = -O3
|
CFLAGS = -O3
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
|
@ -19,22 +19,20 @@ CFLAGS = -O3
|
||||||
# and handle these quantities appropriately. As a consequence, one
|
# and handle these quantities appropriately. As a consequence, one
|
||||||
# should not compile LAPACK with flags such as -ffpe-trap=overflow.
|
# should not compile LAPACK with flags such as -ffpe-trap=overflow.
|
||||||
#
|
#
|
||||||
FORTRAN = gfortran
|
FC = gfortran
|
||||||
OPTS = -O2 -frecursive
|
FFLAGS = -O2 -frecursive
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT = -O0 -frecursive
|
FFLAGS_NOOPT = -O0 -frecursive
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = gfortran
|
LDFLAGS =
|
||||||
LOADOPTS =
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -78,8 +76,8 @@ TIMER = INT_ETIME
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,7 +11,7 @@ SHELL = /bin/sh
|
||||||
CC = gcc
|
CC = gcc
|
||||||
CFLAGS = -g
|
CFLAGS = -g
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
|
@ -19,22 +19,20 @@ CFLAGS = -g
|
||||||
# and handle these quantities appropriately. As a consequence, one
|
# and handle these quantities appropriately. As a consequence, one
|
||||||
# should not compile LAPACK with flags such as -ffpe-trap=overflow.
|
# should not compile LAPACK with flags such as -ffpe-trap=overflow.
|
||||||
#
|
#
|
||||||
FORTRAN = gfortran -fimplicit-none -g -frecursive
|
FC = gfortran
|
||||||
OPTS =
|
FFLAGS = -fimplicit-none -g -frecursive
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT = -g -O0 -frecursive
|
FFLAGS_NOOPT = $(FFLAGS) -O0
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = gfortran -g
|
LDFLAGS =
|
||||||
LOADOPTS =
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -78,8 +76,8 @@ TIMER = INT_CPU_TIME
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,26 +11,24 @@ SHELL = /bin/sh
|
||||||
CC = icc
|
CC = icc
|
||||||
CFLAGS = -O3
|
CFLAGS = -O3
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = ifort
|
FC = ifort
|
||||||
OPTS = -O3 -fp-model strict -assume protect_parens
|
FFLAGS = -O3 -fp-model strict -assume protect_parens
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT = -O0 -fp-model strict -assume protect_parens
|
FFLAGS_NOOPT = -O0 -fp-model strict -assume protect_parens
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = ifort
|
LDFLAGS =
|
||||||
LOADOPTS =
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = ranlib
|
RANLIB = ranlib
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -74,8 +72,8 @@ TIMER = EXT_ETIME
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,26 +11,24 @@ SHELL = /bin/sh
|
||||||
CC = pgcc
|
CC = pgcc
|
||||||
CFLAGS =
|
CFLAGS =
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = pgf95
|
FC = pgf95
|
||||||
OPTS = -O3
|
FFLAGS = -O3
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT = -O0
|
FFLAGS_NOOPT = -O0
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = $(FORTRAN)
|
LDFLAGS =
|
||||||
LOADOPTS =
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -74,8 +72,8 @@ TIMER = INT_CPU_TIME
|
||||||
# machine-specific, optimized BLAS library should be used whenever
|
# machine-specific, optimized BLAS library should be used whenever
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -11,26 +11,24 @@ SHELL = /bin/sh
|
||||||
CC = pghpc
|
CC = pghpc
|
||||||
CFLAGS =
|
CFLAGS =
|
||||||
|
|
||||||
# Modify the FORTRAN and OPTS definitions to refer to the compiler
|
# Modify the FC and FFLAGS definitions to the desired compiler
|
||||||
# and desired compiler options for your machine. NOOPT refers to
|
# and desired compiler options for your machine. NOOPT refers to
|
||||||
# the compiler options desired when NO OPTIMIZATION is selected.
|
# the compiler options desired when NO OPTIMIZATION is selected.
|
||||||
#
|
#
|
||||||
FORTRAN = pghpf
|
FC = pghpf
|
||||||
OPTS = -O4 -Mnohpfc -Mdclchk
|
FFLAGS = -O4 -Mnohpfc -Mdclchk
|
||||||
DRVOPTS = $(OPTS)
|
FFLAGS_DRV = $(FFLAGS)
|
||||||
NOOPT = -Mnohpfc -Mdclchk
|
FFLAGS_NOOPT = -Mnohpfc -Mdclchk
|
||||||
|
|
||||||
# Define LOADER and LOADOPTS to refer to the loader and desired
|
# Define LDFLAGS to the desired linker options for your machine.
|
||||||
# load options for your machine.
|
|
||||||
#
|
#
|
||||||
LOADER = pghpf
|
LDFLAGS =
|
||||||
LOADOPTS =
|
|
||||||
|
|
||||||
# The archiver and the flag(s) to use when building an archive
|
# The archiver and the flag(s) to use when building an archive
|
||||||
# (library). If your system has no ranlib, set RANLIB = echo.
|
# (library). If your system has no ranlib, set RANLIB = echo.
|
||||||
#
|
#
|
||||||
ARCH = ar
|
AR = ar
|
||||||
ARCHFLAGS = cr
|
ARFLAGS = cr
|
||||||
RANLIB = echo
|
RANLIB = echo
|
||||||
|
|
||||||
# Timer for the SECOND and DSECND routines
|
# Timer for the SECOND and DSECND routines
|
||||||
|
@ -75,8 +73,8 @@ TIMER = EXT_ETIME
|
||||||
# possible.)
|
# possible.)
|
||||||
#
|
#
|
||||||
#BLASLIB = -lessl
|
#BLASLIB = -lessl
|
||||||
BLASLIB = ../../librefblas.a
|
BLASLIB = $(TOPSRCDIR)/librefblas.a
|
||||||
CBLASLIB = ../../libcblas.a
|
CBLASLIB = $(TOPSRCDIR)/libcblas.a
|
||||||
LAPACKLIB = liblapack.a
|
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
|
||||||
TMGLIB = libtmglib.a
|
TMGLIB = $(TOPSRCDIR)/libtmglib.a
|
||||||
LAPACKELIB = liblapacke.a
|
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
*
|
*
|
||||||
*> \param[in] CMACH
|
*> \param[in] CMACH
|
||||||
*> \verbatim
|
*> \verbatim
|
||||||
|
*> CMACH is CHARACTER*1
|
||||||
*> Specifies the value to be returned by SLAMCH:
|
*> Specifies the value to be returned by SLAMCH:
|
||||||
*> = 'E' or 'e', SLAMCH := eps
|
*> = 'E' or 'e', SLAMCH := eps
|
||||||
*> = 'S' or 's , SLAMCH := sfmin
|
*> = 'S' or 's , SLAMCH := sfmin
|
||||||
|
|
|
@ -16,18 +16,16 @@ if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND)
|
||||||
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h)
|
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(WIN32 AND NOT UNIX)
|
|
||||||
add_definitions(-DHAVE_LAPACK_CONFIG_H -DLAPACK_COMPLEX_STRUCTURE)
|
|
||||||
message(STATUS "Windows BUILD")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
get_directory_property(DirDefs COMPILE_DEFINITIONS)
|
|
||||||
|
|
||||||
include_directories(include ${LAPACK_BINARY_DIR}/include)
|
include_directories(include ${LAPACK_BINARY_DIR}/include)
|
||||||
add_subdirectory(include)
|
add_subdirectory(include)
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
add_subdirectory(utils)
|
add_subdirectory(utils)
|
||||||
|
|
||||||
|
option(LAPACKE_BUILD_SINGLE "Build LAPACKE single precision real" ON)
|
||||||
|
option(LAPACKE_BUILD_DOUBLE "Build LAPACKE double precision real" ON)
|
||||||
|
option(LAPACKE_BUILD_COMPLEX "Build LAPACKE single precision complex" ON)
|
||||||
|
option(LAPACKE_BUILD_COMPLEX16 "Build LAPACKE double precision complex" ON)
|
||||||
|
|
||||||
macro(append_subdir_files variable dirname)
|
macro(append_subdir_files variable dirname)
|
||||||
get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable})
|
get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable})
|
||||||
foreach(depfile ${holder})
|
foreach(depfile ${holder})
|
||||||
|
@ -35,8 +33,29 @@ macro(append_subdir_files variable dirname)
|
||||||
endforeach()
|
endforeach()
|
||||||
endmacro()
|
endmacro()
|
||||||
|
|
||||||
|
message(STATUS "Build LAPACKE single precision real: ${LAPACKE_BUILD_SINGLE}")
|
||||||
|
message(STATUS "Build LAPACKE double precision real: ${LAPACKE_BUILD_DOUBLE}")
|
||||||
|
message(STATUS "Build LAPACKE single precision complex: ${LAPACKE_BUILD_COMPLEX}")
|
||||||
|
message(STATUS "Build LAPACKE double precision complex: ${LAPACKE_BUILD_COMPLEX16}")
|
||||||
|
|
||||||
append_subdir_files(LAPACKE_INCLUDE "include")
|
append_subdir_files(LAPACKE_INCLUDE "include")
|
||||||
append_subdir_files(SOURCES "src")
|
append_subdir_files(SOURCES "src")
|
||||||
|
if (LAPACKE_BUILD_SINGLE)
|
||||||
|
append_subdir_files(SOURCES_SINGLE "src")
|
||||||
|
list(APPEND SOURCES ${SOURCES_SINGLE})
|
||||||
|
endif()
|
||||||
|
if (LAPACKE_BUILD_DOUBLE)
|
||||||
|
append_subdir_files(SOURCES_DOUBLE "src")
|
||||||
|
list(APPEND SOURCES ${SOURCES_DOUBLE})
|
||||||
|
endif()
|
||||||
|
if (LAPACKE_BUILD_COMPLEX)
|
||||||
|
append_subdir_files(SOURCES_COMPLEX "src")
|
||||||
|
list(APPEND SOURCES ${SOURCES_COMPLEX})
|
||||||
|
endif()
|
||||||
|
if (LAPACKE_BUILD_COMPLEX16)
|
||||||
|
append_subdir_files(SOURCES_COMPLEX16 "src")
|
||||||
|
list(APPEND SOURCES ${SOURCES_COMPLEX16})
|
||||||
|
endif()
|
||||||
append_subdir_files(DEPRECATED "src")
|
append_subdir_files(DEPRECATED "src")
|
||||||
append_subdir_files(EXTENDED "src")
|
append_subdir_files(EXTENDED "src")
|
||||||
append_subdir_files(MATGEN "src")
|
append_subdir_files(MATGEN "src")
|
||||||
|
@ -61,9 +80,13 @@ set_target_properties(
|
||||||
SOVERSION ${LAPACK_MAJOR_VERSION}
|
SOVERSION ${LAPACK_MAJOR_VERSION}
|
||||||
)
|
)
|
||||||
target_include_directories(lapacke PUBLIC
|
target_include_directories(lapacke PUBLIC
|
||||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
|
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||||
$<INSTALL_INTERFACE:include>
|
$<INSTALL_INTERFACE:include>
|
||||||
)
|
)
|
||||||
|
if(WIN32 AND NOT UNIX)
|
||||||
|
target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE)
|
||||||
|
message(STATUS "Windows BUILD")
|
||||||
|
endif()
|
||||||
|
|
||||||
if(LAPACKE_WITH_TMG)
|
if(LAPACKE_WITH_TMG)
|
||||||
target_link_libraries(lapacke PRIVATE tmglib)
|
target_link_libraries(lapacke PRIVATE tmglib)
|
||||||
|
@ -71,7 +94,11 @@ endif()
|
||||||
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES})
|
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES})
|
||||||
|
|
||||||
lapack_install_library(lapacke)
|
lapack_install_library(lapacke)
|
||||||
install(FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
install(
|
||||||
|
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h
|
||||||
|
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||||
|
COMPONENT Development
|
||||||
|
)
|
||||||
|
|
||||||
if(BUILD_TESTING)
|
if(BUILD_TESTING)
|
||||||
add_subdirectory(example)
|
add_subdirectory(example)
|
||||||
|
@ -82,6 +109,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_
|
||||||
install(FILES
|
install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc
|
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc
|
||||||
DESTINATION ${PKG_CONFIG_DIR}
|
DESTINATION ${PKG_CONFIG_DIR}
|
||||||
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in
|
||||||
|
@ -95,7 +123,10 @@ install(FILES
|
||||||
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake
|
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake
|
||||||
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake
|
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
|
||||||
|
COMPONENT Development
|
||||||
)
|
)
|
||||||
|
|
||||||
install(EXPORT lapacke-targets
|
install(EXPORT lapacke-targets
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION})
|
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
|
||||||
|
COMPONENT Development
|
||||||
|
)
|
||||||
|
|
|
@ -40,22 +40,26 @@
|
||||||
# To clean everything including lapacke library type
|
# To clean everything including lapacke library type
|
||||||
# 'make cleanall'
|
# 'make cleanall'
|
||||||
#
|
#
|
||||||
include ../make.inc
|
TOPSRCDIR = ..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: lapacke
|
all: lapacke
|
||||||
|
|
||||||
|
.PHONY: lapacke
|
||||||
lapacke: include/lapacke_mangling.h
|
lapacke: include/lapacke_mangling.h
|
||||||
$(MAKE) -C src
|
$(MAKE) -C src
|
||||||
$(MAKE) -C utils
|
$(MAKE) -C utils
|
||||||
|
|
||||||
include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in
|
include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in
|
||||||
cp $< $@
|
cp include/lapacke_mangling_with_flags.h.in $@
|
||||||
|
|
||||||
|
.PHONY: lapacke_example
|
||||||
lapacke_example: lapacke
|
lapacke_example: lapacke
|
||||||
$(MAKE) -C example
|
$(MAKE) -C example
|
||||||
|
|
||||||
#clean: cleanlib
|
.PHONY: clean cleanobj cleanlib cleanexe
|
||||||
clean: cleanobj
|
clean:
|
||||||
$(MAKE) -C src clean
|
$(MAKE) -C src clean
|
||||||
$(MAKE) -C utils clean
|
$(MAKE) -C utils clean
|
||||||
$(MAKE) -C example clean
|
$(MAKE) -C example clean
|
||||||
|
@ -64,6 +68,6 @@ cleanobj:
|
||||||
$(MAKE) -C utils cleanobj
|
$(MAKE) -C utils cleanobj
|
||||||
$(MAKE) -C example cleanobj
|
$(MAKE) -C example cleanobj
|
||||||
cleanlib:
|
cleanlib:
|
||||||
rm -f ../$(LAPACKELIB)
|
$(MAKE) -C src cleanlib
|
||||||
cleanexe:
|
cleanexe:
|
||||||
$(MAKE) -C example cleanexe
|
$(MAKE) -C example cleanexe
|
||||||
|
|
|
@ -7,8 +7,11 @@ if(NOT TARGET lapacke)
|
||||||
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Hint for project building against lapack
|
||||||
|
set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
|
||||||
|
|
||||||
# Report lapacke header search locations from build tree.
|
# Report lapacke header search locations from build tree.
|
||||||
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
|
||||||
|
|
||||||
# Report lapacke libraries.
|
# Report lapacke libraries.
|
||||||
set(LAPACKE_LIBRARIES lapacke)
|
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})
|
||||||
|
|
|
@ -13,11 +13,14 @@ if(NOT TARGET lapacke)
|
||||||
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake)
|
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# Hint for project building against lapack
|
||||||
|
set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
|
||||||
|
|
||||||
# Report lapacke header search locations.
|
# Report lapacke header search locations.
|
||||||
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include)
|
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include)
|
||||||
|
|
||||||
# Report lapacke libraries.
|
# Report lapacke libraries.
|
||||||
set(LAPACKE_LIBRARIES lapacke)
|
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})
|
||||||
|
|
||||||
unset(_LAPACKE_PREFIX)
|
unset(_LAPACKE_PREFIX)
|
||||||
unset(_LAPACKE_SELF_DIR)
|
unset(_LAPACKE_SELF_DIR)
|
||||||
|
|
|
@ -1,34 +1,38 @@
|
||||||
include ../../make.inc
|
TOPSRCDIR = ../..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
|
.SUFFIXES: .c .o
|
||||||
|
.c.o:
|
||||||
|
$(CC) $(CFLAGS) -I. -I../include -c -o $@ $<
|
||||||
|
|
||||||
|
.PHONY: all
|
||||||
all: xexample_DGESV_rowmajor \
|
all: xexample_DGESV_rowmajor \
|
||||||
xexample_DGESV_colmajor \
|
xexample_DGESV_colmajor \
|
||||||
xexample_DGELS_rowmajor \
|
xexample_DGELS_rowmajor \
|
||||||
xexample_DGELS_colmajor
|
xexample_DGELS_colmajor
|
||||||
|
|
||||||
LIBRARIES = ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB)
|
LIBRARIES = $(LAPACKELIB) $(LAPACKLIB) $(BLASLIB)
|
||||||
|
|
||||||
# Double Precision Examples
|
# Double Precision Examples
|
||||||
xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES)
|
xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
./$@
|
./$@
|
||||||
|
|
||||||
xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES)
|
xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
./$@
|
./$@
|
||||||
|
|
||||||
xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES)
|
xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
./$@
|
./$@
|
||||||
|
|
||||||
xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES)
|
xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES)
|
||||||
$(LOADER) $(LOADOPTS) -o $@ $^
|
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
|
||||||
./$@
|
./$@
|
||||||
|
|
||||||
|
.PHONY: clean cleanobj cleanexe
|
||||||
clean: cleanobj cleanexe
|
clean: cleanobj cleanexe
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
cleanexe:
|
cleanexe:
|
||||||
rm -f x*
|
rm -f x*
|
||||||
|
|
||||||
.c.o:
|
|
||||||
$(CC) $(CFLAGS) -I. -I../include -c -o $@ $<
|
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
set(LAPACKE_INCLUDE lapacke.h lapacke_config.h lapacke_utils.h)
|
set(LAPACKE_INCLUDE lapacke.h lapack.h lapacke_config.h lapacke_utils.h)
|
||||||
|
|
||||||
file(COPY ${LAPACKE_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include)
|
file(COPY ${LAPACKE_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include)
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,4 +1,4 @@
|
||||||
set(SOURCES
|
set(SOURCES_COMPLEX
|
||||||
lapacke_cbbcsd.c
|
lapacke_cbbcsd.c
|
||||||
lapacke_cbbcsd_work.c
|
lapacke_cbbcsd_work.c
|
||||||
lapacke_cbdsqr.c
|
lapacke_cbdsqr.c
|
||||||
|
@ -78,11 +78,11 @@ lapacke_cgeqrf_work.c
|
||||||
lapacke_cgeqrfp.c
|
lapacke_cgeqrfp.c
|
||||||
lapacke_cgeqrfp_work.c
|
lapacke_cgeqrfp_work.c
|
||||||
lapacke_cgeqrt.c
|
lapacke_cgeqrt.c
|
||||||
|
lapacke_cgeqrt_work.c
|
||||||
lapacke_cgeqrt2.c
|
lapacke_cgeqrt2.c
|
||||||
lapacke_cgeqrt2_work.c
|
lapacke_cgeqrt2_work.c
|
||||||
lapacke_cgeqrt3.c
|
lapacke_cgeqrt3.c
|
||||||
lapacke_cgeqrt3_work.c
|
lapacke_cgeqrt3_work.c
|
||||||
lapacke_cgeqrt_work.c
|
|
||||||
lapacke_cgerfs.c
|
lapacke_cgerfs.c
|
||||||
lapacke_cgerfs_work.c
|
lapacke_cgerfs_work.c
|
||||||
lapacke_cgerqf.c
|
lapacke_cgerqf.c
|
||||||
|
@ -93,6 +93,8 @@ lapacke_cgesv.c
|
||||||
lapacke_cgesv_work.c
|
lapacke_cgesv_work.c
|
||||||
lapacke_cgesvd.c
|
lapacke_cgesvd.c
|
||||||
lapacke_cgesvd_work.c
|
lapacke_cgesvd_work.c
|
||||||
|
lapacke_cgesvdq.c
|
||||||
|
lapacke_cgesvdq_work.c
|
||||||
lapacke_cgesvdx.c
|
lapacke_cgesvdx.c
|
||||||
lapacke_cgesvdx_work.c
|
lapacke_cgesvdx_work.c
|
||||||
lapacke_cgesvj.c
|
lapacke_cgesvj.c
|
||||||
|
@ -129,10 +131,10 @@ lapacke_cggevx.c
|
||||||
lapacke_cggevx_work.c
|
lapacke_cggevx_work.c
|
||||||
lapacke_cggglm.c
|
lapacke_cggglm.c
|
||||||
lapacke_cggglm_work.c
|
lapacke_cggglm_work.c
|
||||||
lapacke_cgghrd.c
|
|
||||||
lapacke_cgghrd_work.c
|
|
||||||
lapacke_cgghd3.c
|
lapacke_cgghd3.c
|
||||||
lapacke_cgghd3_work.c
|
lapacke_cgghd3_work.c
|
||||||
|
lapacke_cgghrd.c
|
||||||
|
lapacke_cgghrd_work.c
|
||||||
lapacke_cgglse.c
|
lapacke_cgglse.c
|
||||||
lapacke_cgglse_work.c
|
lapacke_cgglse_work.c
|
||||||
lapacke_cggqrf.c
|
lapacke_cggqrf.c
|
||||||
|
@ -157,14 +159,14 @@ lapacke_cgttrs.c
|
||||||
lapacke_cgttrs_work.c
|
lapacke_cgttrs_work.c
|
||||||
lapacke_chbev.c
|
lapacke_chbev.c
|
||||||
lapacke_chbev_work.c
|
lapacke_chbev_work.c
|
||||||
lapacke_chbevd.c
|
|
||||||
lapacke_chbevd_work.c
|
|
||||||
lapacke_chbevx.c
|
|
||||||
lapacke_chbevx_work.c
|
|
||||||
lapacke_chbev_2stage.c
|
lapacke_chbev_2stage.c
|
||||||
lapacke_chbev_2stage_work.c
|
lapacke_chbev_2stage_work.c
|
||||||
|
lapacke_chbevd.c
|
||||||
|
lapacke_chbevd_work.c
|
||||||
lapacke_chbevd_2stage.c
|
lapacke_chbevd_2stage.c
|
||||||
lapacke_chbevd_2stage_work.c
|
lapacke_chbevd_2stage_work.c
|
||||||
|
lapacke_chbevx.c
|
||||||
|
lapacke_chbevx_work.c
|
||||||
lapacke_chbevx_2stage.c
|
lapacke_chbevx_2stage.c
|
||||||
lapacke_chbevx_2stage_work.c
|
lapacke_chbevx_2stage_work.c
|
||||||
lapacke_chbgst.c
|
lapacke_chbgst.c
|
||||||
|
@ -185,18 +187,18 @@ lapacke_cheequb.c
|
||||||
lapacke_cheequb_work.c
|
lapacke_cheequb_work.c
|
||||||
lapacke_cheev.c
|
lapacke_cheev.c
|
||||||
lapacke_cheev_work.c
|
lapacke_cheev_work.c
|
||||||
lapacke_cheevd.c
|
|
||||||
lapacke_cheevd_work.c
|
|
||||||
lapacke_cheevr.c
|
|
||||||
lapacke_cheevr_work.c
|
|
||||||
lapacke_cheevx.c
|
|
||||||
lapacke_cheevx_work.c
|
|
||||||
lapacke_cheev_2stage.c
|
lapacke_cheev_2stage.c
|
||||||
lapacke_cheev_2stage_work.c
|
lapacke_cheev_2stage_work.c
|
||||||
|
lapacke_cheevd.c
|
||||||
|
lapacke_cheevd_work.c
|
||||||
lapacke_cheevd_2stage.c
|
lapacke_cheevd_2stage.c
|
||||||
lapacke_cheevd_2stage_work.c
|
lapacke_cheevd_2stage_work.c
|
||||||
|
lapacke_cheevr.c
|
||||||
|
lapacke_cheevr_work.c
|
||||||
lapacke_cheevr_2stage.c
|
lapacke_cheevr_2stage.c
|
||||||
lapacke_cheevr_2stage_work.c
|
lapacke_cheevr_2stage_work.c
|
||||||
|
lapacke_cheevx.c
|
||||||
|
lapacke_cheevx_work.c
|
||||||
lapacke_cheevx_2stage.c
|
lapacke_cheevx_2stage.c
|
||||||
lapacke_cheevx_2stage_work.c
|
lapacke_cheevx_2stage_work.c
|
||||||
lapacke_chegst.c
|
lapacke_chegst.c
|
||||||
|
@ -214,8 +216,8 @@ lapacke_cherfs_work.c
|
||||||
lapacke_chesv.c
|
lapacke_chesv.c
|
||||||
lapacke_chesv_work.c
|
lapacke_chesv_work.c
|
||||||
lapacke_chesv_aa.c
|
lapacke_chesv_aa.c
|
||||||
lapacke_chesv_aa_2stage.c
|
|
||||||
lapacke_chesv_aa_work.c
|
lapacke_chesv_aa_work.c
|
||||||
|
lapacke_chesv_aa_2stage.c
|
||||||
lapacke_chesv_aa_2stage_work.c
|
lapacke_chesv_aa_2stage_work.c
|
||||||
lapacke_chesv_rk.c
|
lapacke_chesv_rk.c
|
||||||
lapacke_chesv_rk_work.c
|
lapacke_chesv_rk_work.c
|
||||||
|
@ -226,35 +228,35 @@ lapacke_cheswapr_work.c
|
||||||
lapacke_chetrd.c
|
lapacke_chetrd.c
|
||||||
lapacke_chetrd_work.c
|
lapacke_chetrd_work.c
|
||||||
lapacke_chetrf.c
|
lapacke_chetrf.c
|
||||||
lapacke_chetrf_rook.c
|
|
||||||
lapacke_chetrf_work.c
|
lapacke_chetrf_work.c
|
||||||
lapacke_chetrf_rook_work.c
|
|
||||||
lapacke_chetrf_aa.c
|
lapacke_chetrf_aa.c
|
||||||
lapacke_chetrf_aa_2stage.c
|
|
||||||
lapacke_chetrf_aa_work.c
|
lapacke_chetrf_aa_work.c
|
||||||
|
lapacke_chetrf_aa_2stage.c
|
||||||
lapacke_chetrf_aa_2stage_work.c
|
lapacke_chetrf_aa_2stage_work.c
|
||||||
lapacke_chetrf_rk.c
|
lapacke_chetrf_rk.c
|
||||||
lapacke_chetrf_rk_work.c
|
lapacke_chetrf_rk_work.c
|
||||||
|
lapacke_chetrf_rook.c
|
||||||
|
lapacke_chetrf_rook_work.c
|
||||||
lapacke_chetri.c
|
lapacke_chetri.c
|
||||||
|
lapacke_chetri_work.c
|
||||||
lapacke_chetri2.c
|
lapacke_chetri2.c
|
||||||
lapacke_chetri2_work.c
|
lapacke_chetri2_work.c
|
||||||
lapacke_chetri_3.c
|
|
||||||
lapacke_chetri_3_work.c
|
|
||||||
lapacke_chetri2x.c
|
lapacke_chetri2x.c
|
||||||
lapacke_chetri2x_work.c
|
lapacke_chetri2x_work.c
|
||||||
lapacke_chetri_work.c
|
lapacke_chetri_3.c
|
||||||
|
lapacke_chetri_3_work.c
|
||||||
lapacke_chetrs.c
|
lapacke_chetrs.c
|
||||||
lapacke_chetrs_rook.c
|
lapacke_chetrs_work.c
|
||||||
lapacke_chetrs2.c
|
lapacke_chetrs2.c
|
||||||
lapacke_chetrs2_work.c
|
lapacke_chetrs2_work.c
|
||||||
lapacke_chetrs_work.c
|
|
||||||
lapacke_chetrs_rook_work.c
|
|
||||||
lapacke_chetrs_aa.c
|
|
||||||
lapacke_chetrs_aa_2stage.c
|
|
||||||
lapacke_chetrs_aa_work.c
|
|
||||||
lapacke_chetrs_aa_2stage_work.c
|
|
||||||
lapacke_chetrs_3.c
|
lapacke_chetrs_3.c
|
||||||
lapacke_chetrs_3_work.c
|
lapacke_chetrs_3_work.c
|
||||||
|
lapacke_chetrs_aa.c
|
||||||
|
lapacke_chetrs_aa_work.c
|
||||||
|
lapacke_chetrs_aa_2stage.c
|
||||||
|
lapacke_chetrs_aa_2stage_work.c
|
||||||
|
lapacke_chetrs_rook.c
|
||||||
|
lapacke_chetrs_rook_work.c
|
||||||
lapacke_chfrk.c
|
lapacke_chfrk.c
|
||||||
lapacke_chfrk_work.c
|
lapacke_chfrk_work.c
|
||||||
lapacke_chgeqz.c
|
lapacke_chgeqz.c
|
||||||
|
@ -445,52 +447,54 @@ lapacke_csyconv.c
|
||||||
lapacke_csyconv_work.c
|
lapacke_csyconv_work.c
|
||||||
lapacke_csyequb.c
|
lapacke_csyequb.c
|
||||||
lapacke_csyequb_work.c
|
lapacke_csyequb_work.c
|
||||||
|
lapacke_csyr.c
|
||||||
|
lapacke_csyr_work.c
|
||||||
lapacke_csyrfs.c
|
lapacke_csyrfs.c
|
||||||
lapacke_csyrfs_work.c
|
lapacke_csyrfs_work.c
|
||||||
lapacke_csysv.c
|
lapacke_csysv.c
|
||||||
lapacke_csysv_rook.c
|
|
||||||
lapacke_csysv_rook_work.c
|
|
||||||
lapacke_csysv_work.c
|
lapacke_csysv_work.c
|
||||||
lapacke_csysv_aa.c
|
lapacke_csysv_aa.c
|
||||||
lapacke_csysv_aa_2stage.c
|
|
||||||
lapacke_csysv_aa_work.c
|
lapacke_csysv_aa_work.c
|
||||||
|
lapacke_csysv_aa_2stage.c
|
||||||
lapacke_csysv_aa_2stage_work.c
|
lapacke_csysv_aa_2stage_work.c
|
||||||
lapacke_csysv_rk.c
|
lapacke_csysv_rk.c
|
||||||
lapacke_csysv_rk_work.c
|
lapacke_csysv_rk_work.c
|
||||||
|
lapacke_csysv_rook.c
|
||||||
|
lapacke_csysv_rook_work.c
|
||||||
lapacke_csysvx.c
|
lapacke_csysvx.c
|
||||||
lapacke_csysvx_work.c
|
lapacke_csysvx_work.c
|
||||||
lapacke_csyswapr.c
|
lapacke_csyswapr.c
|
||||||
lapacke_csyswapr_work.c
|
lapacke_csyswapr_work.c
|
||||||
lapacke_csytrf.c
|
lapacke_csytrf.c
|
||||||
lapacke_csytrf_work.c
|
lapacke_csytrf_work.c
|
||||||
lapacke_csytrf_rook.c
|
|
||||||
lapacke_csytrf_rook_work.c
|
|
||||||
lapacke_csytrf_aa.c
|
lapacke_csytrf_aa.c
|
||||||
lapacke_csytrf_aa_2stage.c
|
|
||||||
lapacke_csytrf_aa_work.c
|
lapacke_csytrf_aa_work.c
|
||||||
|
lapacke_csytrf_aa_2stage.c
|
||||||
lapacke_csytrf_aa_2stage_work.c
|
lapacke_csytrf_aa_2stage_work.c
|
||||||
lapacke_csytrf_rk.c
|
lapacke_csytrf_rk.c
|
||||||
lapacke_csytrf_rk_work.c
|
lapacke_csytrf_rk_work.c
|
||||||
|
lapacke_csytrf_rook.c
|
||||||
|
lapacke_csytrf_rook_work.c
|
||||||
lapacke_csytri.c
|
lapacke_csytri.c
|
||||||
|
lapacke_csytri_work.c
|
||||||
lapacke_csytri2.c
|
lapacke_csytri2.c
|
||||||
lapacke_csytri2_work.c
|
lapacke_csytri2_work.c
|
||||||
lapacke_csytri_3.c
|
|
||||||
lapacke_csytri_3_work.c
|
|
||||||
lapacke_csytri2x.c
|
lapacke_csytri2x.c
|
||||||
lapacke_csytri2x_work.c
|
lapacke_csytri2x_work.c
|
||||||
lapacke_csytri_work.c
|
lapacke_csytri_3.c
|
||||||
|
lapacke_csytri_3_work.c
|
||||||
lapacke_csytrs.c
|
lapacke_csytrs.c
|
||||||
lapacke_csytrs_rook.c
|
lapacke_csytrs_work.c
|
||||||
lapacke_csytrs2.c
|
lapacke_csytrs2.c
|
||||||
lapacke_csytrs2_work.c
|
lapacke_csytrs2_work.c
|
||||||
lapacke_csytrs_work.c
|
|
||||||
lapacke_csytrs_rook_work.c
|
|
||||||
lapacke_csytrs_aa.c
|
|
||||||
lapacke_csytrs_aa_2stage.c
|
|
||||||
lapacke_csytrs_aa_work.c
|
|
||||||
lapacke_csytrs_aa_2stage_work.c
|
|
||||||
lapacke_csytrs_3.c
|
lapacke_csytrs_3.c
|
||||||
lapacke_csytrs_3_work.c
|
lapacke_csytrs_3_work.c
|
||||||
|
lapacke_csytrs_aa.c
|
||||||
|
lapacke_csytrs_aa_work.c
|
||||||
|
lapacke_csytrs_aa_2stage.c
|
||||||
|
lapacke_csytrs_aa_2stage_work.c
|
||||||
|
lapacke_csytrs_rook.c
|
||||||
|
lapacke_csytrs_rook_work.c
|
||||||
lapacke_ctbcon.c
|
lapacke_ctbcon.c
|
||||||
lapacke_ctbcon_work.c
|
lapacke_ctbcon_work.c
|
||||||
lapacke_ctbrfs.c
|
lapacke_ctbrfs.c
|
||||||
|
@ -522,9 +526,9 @@ lapacke_ctpcon_work.c
|
||||||
lapacke_ctpmqrt.c
|
lapacke_ctpmqrt.c
|
||||||
lapacke_ctpmqrt_work.c
|
lapacke_ctpmqrt_work.c
|
||||||
lapacke_ctpqrt.c
|
lapacke_ctpqrt.c
|
||||||
|
lapacke_ctpqrt_work.c
|
||||||
lapacke_ctpqrt2.c
|
lapacke_ctpqrt2.c
|
||||||
lapacke_ctpqrt2_work.c
|
lapacke_ctpqrt2_work.c
|
||||||
lapacke_ctpqrt_work.c
|
|
||||||
lapacke_ctprfb.c
|
lapacke_ctprfb.c
|
||||||
lapacke_ctprfb_work.c
|
lapacke_ctprfb_work.c
|
||||||
lapacke_ctprfs.c
|
lapacke_ctprfs.c
|
||||||
|
@ -601,14 +605,16 @@ lapacke_cupgtr.c
|
||||||
lapacke_cupgtr_work.c
|
lapacke_cupgtr_work.c
|
||||||
lapacke_cupmtr.c
|
lapacke_cupmtr.c
|
||||||
lapacke_cupmtr_work.c
|
lapacke_cupmtr_work.c
|
||||||
|
)
|
||||||
|
set(SOURCES_DOUBLE
|
||||||
lapacke_dbbcsd.c
|
lapacke_dbbcsd.c
|
||||||
lapacke_dbbcsd_work.c
|
lapacke_dbbcsd_work.c
|
||||||
lapacke_dbdsdc.c
|
lapacke_dbdsdc.c
|
||||||
lapacke_dbdsdc_work.c
|
lapacke_dbdsdc_work.c
|
||||||
lapacke_dbdsvdx.c
|
|
||||||
lapacke_dbdsvdx_work.c
|
|
||||||
lapacke_dbdsqr.c
|
lapacke_dbdsqr.c
|
||||||
lapacke_dbdsqr_work.c
|
lapacke_dbdsqr_work.c
|
||||||
|
lapacke_dbdsvdx.c
|
||||||
|
lapacke_dbdsvdx_work.c
|
||||||
lapacke_ddisna.c
|
lapacke_ddisna.c
|
||||||
lapacke_ddisna_work.c
|
lapacke_ddisna_work.c
|
||||||
lapacke_dgbbrd.c
|
lapacke_dgbbrd.c
|
||||||
|
@ -686,11 +692,11 @@ lapacke_dgeqrf_work.c
|
||||||
lapacke_dgeqrfp.c
|
lapacke_dgeqrfp.c
|
||||||
lapacke_dgeqrfp_work.c
|
lapacke_dgeqrfp_work.c
|
||||||
lapacke_dgeqrt.c
|
lapacke_dgeqrt.c
|
||||||
|
lapacke_dgeqrt_work.c
|
||||||
lapacke_dgeqrt2.c
|
lapacke_dgeqrt2.c
|
||||||
lapacke_dgeqrt2_work.c
|
lapacke_dgeqrt2_work.c
|
||||||
lapacke_dgeqrt3.c
|
lapacke_dgeqrt3.c
|
||||||
lapacke_dgeqrt3_work.c
|
lapacke_dgeqrt3_work.c
|
||||||
lapacke_dgeqrt_work.c
|
|
||||||
lapacke_dgerfs.c
|
lapacke_dgerfs.c
|
||||||
lapacke_dgerfs_work.c
|
lapacke_dgerfs_work.c
|
||||||
lapacke_dgerqf.c
|
lapacke_dgerqf.c
|
||||||
|
@ -701,6 +707,8 @@ lapacke_dgesv.c
|
||||||
lapacke_dgesv_work.c
|
lapacke_dgesv_work.c
|
||||||
lapacke_dgesvd.c
|
lapacke_dgesvd.c
|
||||||
lapacke_dgesvd_work.c
|
lapacke_dgesvd_work.c
|
||||||
|
lapacke_dgesvdq.c
|
||||||
|
lapacke_dgesvdq_work.c
|
||||||
lapacke_dgesvdx.c
|
lapacke_dgesvdx.c
|
||||||
lapacke_dgesvdx_work.c
|
lapacke_dgesvdx_work.c
|
||||||
lapacke_dgesvj.c
|
lapacke_dgesvj.c
|
||||||
|
@ -737,10 +745,10 @@ lapacke_dggevx.c
|
||||||
lapacke_dggevx_work.c
|
lapacke_dggevx_work.c
|
||||||
lapacke_dggglm.c
|
lapacke_dggglm.c
|
||||||
lapacke_dggglm_work.c
|
lapacke_dggglm_work.c
|
||||||
lapacke_dgghrd.c
|
|
||||||
lapacke_dgghrd_work.c
|
|
||||||
lapacke_dgghd3.c
|
lapacke_dgghd3.c
|
||||||
lapacke_dgghd3_work.c
|
lapacke_dgghd3_work.c
|
||||||
|
lapacke_dgghrd.c
|
||||||
|
lapacke_dgghrd_work.c
|
||||||
lapacke_dgglse.c
|
lapacke_dgglse.c
|
||||||
lapacke_dgglse_work.c
|
lapacke_dgglse_work.c
|
||||||
lapacke_dggqrf.c
|
lapacke_dggqrf.c
|
||||||
|
@ -823,10 +831,10 @@ lapacke_dopmtr.c
|
||||||
lapacke_dopmtr_work.c
|
lapacke_dopmtr_work.c
|
||||||
lapacke_dorbdb.c
|
lapacke_dorbdb.c
|
||||||
lapacke_dorbdb_work.c
|
lapacke_dorbdb_work.c
|
||||||
lapacke_dorcsd2by1.c
|
|
||||||
lapacke_dorcsd2by1_work.c
|
|
||||||
lapacke_dorcsd.c
|
lapacke_dorcsd.c
|
||||||
lapacke_dorcsd_work.c
|
lapacke_dorcsd_work.c
|
||||||
|
lapacke_dorcsd2by1.c
|
||||||
|
lapacke_dorcsd2by1_work.c
|
||||||
lapacke_dorgbr.c
|
lapacke_dorgbr.c
|
||||||
lapacke_dorgbr_work.c
|
lapacke_dorgbr_work.c
|
||||||
lapacke_dorghr.c
|
lapacke_dorghr.c
|
||||||
|
@ -933,14 +941,14 @@ lapacke_dpttrs.c
|
||||||
lapacke_dpttrs_work.c
|
lapacke_dpttrs_work.c
|
||||||
lapacke_dsbev.c
|
lapacke_dsbev.c
|
||||||
lapacke_dsbev_work.c
|
lapacke_dsbev_work.c
|
||||||
lapacke_dsbevd.c
|
|
||||||
lapacke_dsbevd_work.c
|
|
||||||
lapacke_dsbevx.c
|
|
||||||
lapacke_dsbevx_work.c
|
|
||||||
lapacke_dsbev_2stage.c
|
lapacke_dsbev_2stage.c
|
||||||
lapacke_dsbev_2stage_work.c
|
lapacke_dsbev_2stage_work.c
|
||||||
|
lapacke_dsbevd.c
|
||||||
|
lapacke_dsbevd_work.c
|
||||||
lapacke_dsbevd_2stage.c
|
lapacke_dsbevd_2stage.c
|
||||||
lapacke_dsbevd_2stage_work.c
|
lapacke_dsbevd_2stage_work.c
|
||||||
|
lapacke_dsbevx.c
|
||||||
|
lapacke_dsbevx_work.c
|
||||||
lapacke_dsbevx_2stage.c
|
lapacke_dsbevx_2stage.c
|
||||||
lapacke_dsbevx_2stage_work.c
|
lapacke_dsbevx_2stage_work.c
|
||||||
lapacke_dsbgst.c
|
lapacke_dsbgst.c
|
||||||
|
@ -1021,18 +1029,18 @@ lapacke_dsyequb.c
|
||||||
lapacke_dsyequb_work.c
|
lapacke_dsyequb_work.c
|
||||||
lapacke_dsyev.c
|
lapacke_dsyev.c
|
||||||
lapacke_dsyev_work.c
|
lapacke_dsyev_work.c
|
||||||
lapacke_dsyevd.c
|
|
||||||
lapacke_dsyevd_work.c
|
|
||||||
lapacke_dsyevr.c
|
|
||||||
lapacke_dsyevr_work.c
|
|
||||||
lapacke_dsyevx.c
|
|
||||||
lapacke_dsyevx_work.c
|
|
||||||
lapacke_dsyev_2stage.c
|
lapacke_dsyev_2stage.c
|
||||||
lapacke_dsyev_2stage_work.c
|
lapacke_dsyev_2stage_work.c
|
||||||
|
lapacke_dsyevd.c
|
||||||
|
lapacke_dsyevd_work.c
|
||||||
lapacke_dsyevd_2stage.c
|
lapacke_dsyevd_2stage.c
|
||||||
lapacke_dsyevd_2stage_work.c
|
lapacke_dsyevd_2stage_work.c
|
||||||
|
lapacke_dsyevr.c
|
||||||
|
lapacke_dsyevr_work.c
|
||||||
lapacke_dsyevr_2stage.c
|
lapacke_dsyevr_2stage.c
|
||||||
lapacke_dsyevr_2stage_work.c
|
lapacke_dsyevr_2stage_work.c
|
||||||
|
lapacke_dsyevx.c
|
||||||
|
lapacke_dsyevx_work.c
|
||||||
lapacke_dsyevx_2stage.c
|
lapacke_dsyevx_2stage.c
|
||||||
lapacke_dsyevx_2stage_work.c
|
lapacke_dsyevx_2stage_work.c
|
||||||
lapacke_dsygst.c
|
lapacke_dsygst.c
|
||||||
|
@ -1048,15 +1056,15 @@ lapacke_dsygvx_work.c
|
||||||
lapacke_dsyrfs.c
|
lapacke_dsyrfs.c
|
||||||
lapacke_dsyrfs_work.c
|
lapacke_dsyrfs_work.c
|
||||||
lapacke_dsysv.c
|
lapacke_dsysv.c
|
||||||
lapacke_dsysv_rook.c
|
|
||||||
lapacke_dsysv_rook_work.c
|
|
||||||
lapacke_dsysv_work.c
|
lapacke_dsysv_work.c
|
||||||
lapacke_dsysv_aa.c
|
lapacke_dsysv_aa.c
|
||||||
lapacke_dsysv_aa_2stage.c
|
|
||||||
lapacke_dsysv_aa_work.c
|
lapacke_dsysv_aa_work.c
|
||||||
|
lapacke_dsysv_aa_2stage.c
|
||||||
lapacke_dsysv_aa_2stage_work.c
|
lapacke_dsysv_aa_2stage_work.c
|
||||||
lapacke_dsysv_rk.c
|
lapacke_dsysv_rk.c
|
||||||
lapacke_dsysv_rk_work.c
|
lapacke_dsysv_rk_work.c
|
||||||
|
lapacke_dsysv_rook.c
|
||||||
|
lapacke_dsysv_rook_work.c
|
||||||
lapacke_dsysvx.c
|
lapacke_dsysvx.c
|
||||||
lapacke_dsysvx_work.c
|
lapacke_dsysvx_work.c
|
||||||
lapacke_dsyswapr.c
|
lapacke_dsyswapr.c
|
||||||
|
@ -1065,33 +1073,33 @@ lapacke_dsytrd.c
|
||||||
lapacke_dsytrd_work.c
|
lapacke_dsytrd_work.c
|
||||||
lapacke_dsytrf.c
|
lapacke_dsytrf.c
|
||||||
lapacke_dsytrf_work.c
|
lapacke_dsytrf_work.c
|
||||||
lapacke_dsytrf_rook.c
|
|
||||||
lapacke_dsytrf_rook_work.c
|
|
||||||
lapacke_dsytrf_aa.c
|
lapacke_dsytrf_aa.c
|
||||||
lapacke_dsytrf_aa_2stage.c
|
|
||||||
lapacke_dsytrf_aa_work.c
|
lapacke_dsytrf_aa_work.c
|
||||||
|
lapacke_dsytrf_aa_2stage.c
|
||||||
lapacke_dsytrf_aa_2stage_work.c
|
lapacke_dsytrf_aa_2stage_work.c
|
||||||
lapacke_dsytrf_rk.c
|
lapacke_dsytrf_rk.c
|
||||||
lapacke_dsytrf_rk_work.c
|
lapacke_dsytrf_rk_work.c
|
||||||
|
lapacke_dsytrf_rook.c
|
||||||
|
lapacke_dsytrf_rook_work.c
|
||||||
lapacke_dsytri.c
|
lapacke_dsytri.c
|
||||||
|
lapacke_dsytri_work.c
|
||||||
lapacke_dsytri2.c
|
lapacke_dsytri2.c
|
||||||
lapacke_dsytri2_work.c
|
lapacke_dsytri2_work.c
|
||||||
lapacke_dsytri_3.c
|
|
||||||
lapacke_dsytri_3_work.c
|
|
||||||
lapacke_dsytri2x.c
|
lapacke_dsytri2x.c
|
||||||
lapacke_dsytri2x_work.c
|
lapacke_dsytri2x_work.c
|
||||||
lapacke_dsytri_work.c
|
lapacke_dsytri_3.c
|
||||||
|
lapacke_dsytri_3_work.c
|
||||||
lapacke_dsytrs.c
|
lapacke_dsytrs.c
|
||||||
lapacke_dsytrs_rook.c
|
lapacke_dsytrs_work.c
|
||||||
lapacke_dsytrs2.c
|
lapacke_dsytrs2.c
|
||||||
lapacke_dsytrs2_work.c
|
lapacke_dsytrs2_work.c
|
||||||
lapacke_dsytrs_aa.c
|
|
||||||
lapacke_dsytrs_aa_2stage.c
|
|
||||||
lapacke_dsytrs_aa_work.c
|
|
||||||
lapacke_dsytrs_aa_2stage_work.c
|
|
||||||
lapacke_dsytrs_3.c
|
lapacke_dsytrs_3.c
|
||||||
lapacke_dsytrs_3_work.c
|
lapacke_dsytrs_3_work.c
|
||||||
lapacke_dsytrs_work.c
|
lapacke_dsytrs_aa.c
|
||||||
|
lapacke_dsytrs_aa_work.c
|
||||||
|
lapacke_dsytrs_aa_2stage.c
|
||||||
|
lapacke_dsytrs_aa_2stage_work.c
|
||||||
|
lapacke_dsytrs_rook.c
|
||||||
lapacke_dsytrs_rook_work.c
|
lapacke_dsytrs_rook_work.c
|
||||||
lapacke_dtbcon.c
|
lapacke_dtbcon.c
|
||||||
lapacke_dtbcon_work.c
|
lapacke_dtbcon_work.c
|
||||||
|
@ -1124,9 +1132,9 @@ lapacke_dtpcon_work.c
|
||||||
lapacke_dtpmqrt.c
|
lapacke_dtpmqrt.c
|
||||||
lapacke_dtpmqrt_work.c
|
lapacke_dtpmqrt_work.c
|
||||||
lapacke_dtpqrt.c
|
lapacke_dtpqrt.c
|
||||||
|
lapacke_dtpqrt_work.c
|
||||||
lapacke_dtpqrt2.c
|
lapacke_dtpqrt2.c
|
||||||
lapacke_dtpqrt2_work.c
|
lapacke_dtpqrt2_work.c
|
||||||
lapacke_dtpqrt_work.c
|
|
||||||
lapacke_dtprfb.c
|
lapacke_dtprfb.c
|
||||||
lapacke_dtprfb_work.c
|
lapacke_dtprfb_work.c
|
||||||
lapacke_dtprfs.c
|
lapacke_dtprfs.c
|
||||||
|
@ -1163,15 +1171,21 @@ lapacke_dtrttp.c
|
||||||
lapacke_dtrttp_work.c
|
lapacke_dtrttp_work.c
|
||||||
lapacke_dtzrzf.c
|
lapacke_dtzrzf.c
|
||||||
lapacke_dtzrzf_work.c
|
lapacke_dtzrzf_work.c
|
||||||
|
)
|
||||||
|
|
||||||
|
set(SOURCES
|
||||||
lapacke_nancheck.c
|
lapacke_nancheck.c
|
||||||
|
lapacke_ilaver.c
|
||||||
|
)
|
||||||
|
set(SOURCES_SINGLE
|
||||||
lapacke_sbbcsd.c
|
lapacke_sbbcsd.c
|
||||||
lapacke_sbbcsd_work.c
|
lapacke_sbbcsd_work.c
|
||||||
lapacke_sbdsdc.c
|
lapacke_sbdsdc.c
|
||||||
lapacke_sbdsdc_work.c
|
lapacke_sbdsdc_work.c
|
||||||
lapacke_sbdsvdx.c
|
|
||||||
lapacke_sbdsvdx_work.c
|
|
||||||
lapacke_sbdsqr.c
|
lapacke_sbdsqr.c
|
||||||
lapacke_sbdsqr_work.c
|
lapacke_sbdsqr_work.c
|
||||||
|
lapacke_sbdsvdx.c
|
||||||
|
lapacke_sbdsvdx_work.c
|
||||||
lapacke_sdisna.c
|
lapacke_sdisna.c
|
||||||
lapacke_sdisna_work.c
|
lapacke_sdisna_work.c
|
||||||
lapacke_sgbbrd.c
|
lapacke_sgbbrd.c
|
||||||
|
@ -1249,11 +1263,11 @@ lapacke_sgeqrf_work.c
|
||||||
lapacke_sgeqrfp.c
|
lapacke_sgeqrfp.c
|
||||||
lapacke_sgeqrfp_work.c
|
lapacke_sgeqrfp_work.c
|
||||||
lapacke_sgeqrt.c
|
lapacke_sgeqrt.c
|
||||||
|
lapacke_sgeqrt_work.c
|
||||||
lapacke_sgeqrt2.c
|
lapacke_sgeqrt2.c
|
||||||
lapacke_sgeqrt2_work.c
|
lapacke_sgeqrt2_work.c
|
||||||
lapacke_sgeqrt3.c
|
lapacke_sgeqrt3.c
|
||||||
lapacke_sgeqrt3_work.c
|
lapacke_sgeqrt3_work.c
|
||||||
lapacke_sgeqrt_work.c
|
|
||||||
lapacke_sgerfs.c
|
lapacke_sgerfs.c
|
||||||
lapacke_sgerfs_work.c
|
lapacke_sgerfs_work.c
|
||||||
lapacke_sgerqf.c
|
lapacke_sgerqf.c
|
||||||
|
@ -1264,6 +1278,8 @@ lapacke_sgesv.c
|
||||||
lapacke_sgesv_work.c
|
lapacke_sgesv_work.c
|
||||||
lapacke_sgesvd.c
|
lapacke_sgesvd.c
|
||||||
lapacke_sgesvd_work.c
|
lapacke_sgesvd_work.c
|
||||||
|
lapacke_sgesvdq.c
|
||||||
|
lapacke_sgesvdq_work.c
|
||||||
lapacke_sgesvdx.c
|
lapacke_sgesvdx.c
|
||||||
lapacke_sgesvdx_work.c
|
lapacke_sgesvdx_work.c
|
||||||
lapacke_sgesvj.c
|
lapacke_sgesvj.c
|
||||||
|
@ -1300,10 +1316,10 @@ lapacke_sggevx.c
|
||||||
lapacke_sggevx_work.c
|
lapacke_sggevx_work.c
|
||||||
lapacke_sggglm.c
|
lapacke_sggglm.c
|
||||||
lapacke_sggglm_work.c
|
lapacke_sggglm_work.c
|
||||||
lapacke_sgghrd.c
|
|
||||||
lapacke_sgghrd_work.c
|
|
||||||
lapacke_sgghd3.c
|
lapacke_sgghd3.c
|
||||||
lapacke_sgghd3_work.c
|
lapacke_sgghd3_work.c
|
||||||
|
lapacke_sgghrd.c
|
||||||
|
lapacke_sgghrd_work.c
|
||||||
lapacke_sgglse.c
|
lapacke_sgglse.c
|
||||||
lapacke_sgglse_work.c
|
lapacke_sgglse_work.c
|
||||||
lapacke_sggqrf.c
|
lapacke_sggqrf.c
|
||||||
|
@ -1496,14 +1512,14 @@ lapacke_spttrs.c
|
||||||
lapacke_spttrs_work.c
|
lapacke_spttrs_work.c
|
||||||
lapacke_ssbev.c
|
lapacke_ssbev.c
|
||||||
lapacke_ssbev_work.c
|
lapacke_ssbev_work.c
|
||||||
lapacke_ssbevd.c
|
|
||||||
lapacke_ssbevd_work.c
|
|
||||||
lapacke_ssbevx.c
|
|
||||||
lapacke_ssbevx_work.c
|
|
||||||
lapacke_ssbev_2stage.c
|
lapacke_ssbev_2stage.c
|
||||||
lapacke_ssbev_2stage_work.c
|
lapacke_ssbev_2stage_work.c
|
||||||
|
lapacke_ssbevd.c
|
||||||
|
lapacke_ssbevd_work.c
|
||||||
lapacke_ssbevd_2stage.c
|
lapacke_ssbevd_2stage.c
|
||||||
lapacke_ssbevd_2stage_work.c
|
lapacke_ssbevd_2stage_work.c
|
||||||
|
lapacke_ssbevx.c
|
||||||
|
lapacke_ssbevx_work.c
|
||||||
lapacke_ssbevx_2stage.c
|
lapacke_ssbevx_2stage.c
|
||||||
lapacke_ssbevx_2stage_work.c
|
lapacke_ssbevx_2stage_work.c
|
||||||
lapacke_ssbgst.c
|
lapacke_ssbgst.c
|
||||||
|
@ -1580,18 +1596,18 @@ lapacke_ssyequb.c
|
||||||
lapacke_ssyequb_work.c
|
lapacke_ssyequb_work.c
|
||||||
lapacke_ssyev.c
|
lapacke_ssyev.c
|
||||||
lapacke_ssyev_work.c
|
lapacke_ssyev_work.c
|
||||||
lapacke_ssyevd.c
|
|
||||||
lapacke_ssyevd_work.c
|
|
||||||
lapacke_ssyevr.c
|
|
||||||
lapacke_ssyevr_work.c
|
|
||||||
lapacke_ssyevx.c
|
|
||||||
lapacke_ssyevx_work.c
|
|
||||||
lapacke_ssyev_2stage.c
|
lapacke_ssyev_2stage.c
|
||||||
lapacke_ssyev_2stage_work.c
|
lapacke_ssyev_2stage_work.c
|
||||||
|
lapacke_ssyevd.c
|
||||||
|
lapacke_ssyevd_work.c
|
||||||
lapacke_ssyevd_2stage.c
|
lapacke_ssyevd_2stage.c
|
||||||
lapacke_ssyevd_2stage_work.c
|
lapacke_ssyevd_2stage_work.c
|
||||||
|
lapacke_ssyevr.c
|
||||||
|
lapacke_ssyevr_work.c
|
||||||
lapacke_ssyevr_2stage.c
|
lapacke_ssyevr_2stage.c
|
||||||
lapacke_ssyevr_2stage_work.c
|
lapacke_ssyevr_2stage_work.c
|
||||||
|
lapacke_ssyevx.c
|
||||||
|
lapacke_ssyevx_work.c
|
||||||
lapacke_ssyevx_2stage.c
|
lapacke_ssyevx_2stage.c
|
||||||
lapacke_ssyevx_2stage_work.c
|
lapacke_ssyevx_2stage_work.c
|
||||||
lapacke_ssygst.c
|
lapacke_ssygst.c
|
||||||
|
@ -1607,8 +1623,6 @@ lapacke_ssygvx_work.c
|
||||||
lapacke_ssyrfs.c
|
lapacke_ssyrfs.c
|
||||||
lapacke_ssyrfs_work.c
|
lapacke_ssyrfs_work.c
|
||||||
lapacke_ssysv.c
|
lapacke_ssysv.c
|
||||||
lapacke_ssysv_rook.c
|
|
||||||
lapacke_ssysv_rook_work.c
|
|
||||||
lapacke_ssysv_work.c
|
lapacke_ssysv_work.c
|
||||||
lapacke_ssysv_aa.c
|
lapacke_ssysv_aa.c
|
||||||
lapacke_ssysv_aa_work.c
|
lapacke_ssysv_aa_work.c
|
||||||
|
@ -1616,6 +1630,8 @@ lapacke_ssysv_aa_2stage.c
|
||||||
lapacke_ssysv_aa_2stage_work.c
|
lapacke_ssysv_aa_2stage_work.c
|
||||||
lapacke_ssysv_rk.c
|
lapacke_ssysv_rk.c
|
||||||
lapacke_ssysv_rk_work.c
|
lapacke_ssysv_rk_work.c
|
||||||
|
lapacke_ssysv_rook.c
|
||||||
|
lapacke_ssysv_rook_work.c
|
||||||
lapacke_ssysvx.c
|
lapacke_ssysvx.c
|
||||||
lapacke_ssysvx_work.c
|
lapacke_ssysvx_work.c
|
||||||
lapacke_ssyswapr.c
|
lapacke_ssyswapr.c
|
||||||
|
@ -1624,33 +1640,33 @@ lapacke_ssytrd.c
|
||||||
lapacke_ssytrd_work.c
|
lapacke_ssytrd_work.c
|
||||||
lapacke_ssytrf.c
|
lapacke_ssytrf.c
|
||||||
lapacke_ssytrf_work.c
|
lapacke_ssytrf_work.c
|
||||||
lapacke_ssytrf_rook.c
|
|
||||||
lapacke_ssytrf_rook_work.c
|
|
||||||
lapacke_ssytrf_aa.c
|
lapacke_ssytrf_aa.c
|
||||||
lapacke_ssytrf_aa_2stage.c
|
|
||||||
lapacke_ssytrf_aa_work.c
|
lapacke_ssytrf_aa_work.c
|
||||||
|
lapacke_ssytrf_aa_2stage.c
|
||||||
lapacke_ssytrf_aa_2stage_work.c
|
lapacke_ssytrf_aa_2stage_work.c
|
||||||
lapacke_ssytrf_rk.c
|
lapacke_ssytrf_rk.c
|
||||||
lapacke_ssytrf_rk_work.c
|
lapacke_ssytrf_rk_work.c
|
||||||
|
lapacke_ssytrf_rook.c
|
||||||
|
lapacke_ssytrf_rook_work.c
|
||||||
lapacke_ssytri.c
|
lapacke_ssytri.c
|
||||||
|
lapacke_ssytri_work.c
|
||||||
lapacke_ssytri2.c
|
lapacke_ssytri2.c
|
||||||
lapacke_ssytri2_work.c
|
lapacke_ssytri2_work.c
|
||||||
lapacke_ssytri_3.c
|
|
||||||
lapacke_ssytri_3_work.c
|
|
||||||
lapacke_ssytri2x.c
|
lapacke_ssytri2x.c
|
||||||
lapacke_ssytri2x_work.c
|
lapacke_ssytri2x_work.c
|
||||||
lapacke_ssytri_work.c
|
lapacke_ssytri_3.c
|
||||||
|
lapacke_ssytri_3_work.c
|
||||||
lapacke_ssytrs.c
|
lapacke_ssytrs.c
|
||||||
lapacke_ssytrs_rook.c
|
lapacke_ssytrs_work.c
|
||||||
lapacke_ssytrs2.c
|
lapacke_ssytrs2.c
|
||||||
lapacke_ssytrs2_work.c
|
lapacke_ssytrs2_work.c
|
||||||
lapacke_ssytrs_aa.c
|
|
||||||
lapacke_ssytrs_aa_2stage.c
|
|
||||||
lapacke_ssytrs_aa_work.c
|
|
||||||
lapacke_ssytrs_aa_2stage_work.c
|
|
||||||
lapacke_ssytrs_3.c
|
lapacke_ssytrs_3.c
|
||||||
lapacke_ssytrs_3_work.c
|
lapacke_ssytrs_3_work.c
|
||||||
lapacke_ssytrs_work.c
|
lapacke_ssytrs_aa.c
|
||||||
|
lapacke_ssytrs_aa_work.c
|
||||||
|
lapacke_ssytrs_aa_2stage.c
|
||||||
|
lapacke_ssytrs_aa_2stage_work.c
|
||||||
|
lapacke_ssytrs_rook.c
|
||||||
lapacke_ssytrs_rook_work.c
|
lapacke_ssytrs_rook_work.c
|
||||||
lapacke_stbcon.c
|
lapacke_stbcon.c
|
||||||
lapacke_stbcon_work.c
|
lapacke_stbcon_work.c
|
||||||
|
@ -1722,6 +1738,8 @@ lapacke_strttp.c
|
||||||
lapacke_strttp_work.c
|
lapacke_strttp_work.c
|
||||||
lapacke_stzrzf.c
|
lapacke_stzrzf.c
|
||||||
lapacke_stzrzf_work.c
|
lapacke_stzrzf_work.c
|
||||||
|
)
|
||||||
|
set(SOURCES_COMPLEX16
|
||||||
lapacke_zbbcsd.c
|
lapacke_zbbcsd.c
|
||||||
lapacke_zbbcsd_work.c
|
lapacke_zbbcsd_work.c
|
||||||
lapacke_zbdsqr.c
|
lapacke_zbdsqr.c
|
||||||
|
@ -1805,11 +1823,11 @@ lapacke_zgeqrf_work.c
|
||||||
lapacke_zgeqrfp.c
|
lapacke_zgeqrfp.c
|
||||||
lapacke_zgeqrfp_work.c
|
lapacke_zgeqrfp_work.c
|
||||||
lapacke_zgeqrt.c
|
lapacke_zgeqrt.c
|
||||||
|
lapacke_zgeqrt_work.c
|
||||||
lapacke_zgeqrt2.c
|
lapacke_zgeqrt2.c
|
||||||
lapacke_zgeqrt2_work.c
|
lapacke_zgeqrt2_work.c
|
||||||
lapacke_zgeqrt3.c
|
lapacke_zgeqrt3.c
|
||||||
lapacke_zgeqrt3_work.c
|
lapacke_zgeqrt3_work.c
|
||||||
lapacke_zgeqrt_work.c
|
|
||||||
lapacke_zgerfs.c
|
lapacke_zgerfs.c
|
||||||
lapacke_zgerfs_work.c
|
lapacke_zgerfs_work.c
|
||||||
lapacke_zgerqf.c
|
lapacke_zgerqf.c
|
||||||
|
@ -1820,6 +1838,8 @@ lapacke_zgesv.c
|
||||||
lapacke_zgesv_work.c
|
lapacke_zgesv_work.c
|
||||||
lapacke_zgesvd.c
|
lapacke_zgesvd.c
|
||||||
lapacke_zgesvd_work.c
|
lapacke_zgesvd_work.c
|
||||||
|
lapacke_zgesvdq.c
|
||||||
|
lapacke_zgesvdq_work.c
|
||||||
lapacke_zgesvdx.c
|
lapacke_zgesvdx.c
|
||||||
lapacke_zgesvdx_work.c
|
lapacke_zgesvdx_work.c
|
||||||
lapacke_zgesvj.c
|
lapacke_zgesvj.c
|
||||||
|
@ -1856,10 +1876,10 @@ lapacke_zggevx.c
|
||||||
lapacke_zggevx_work.c
|
lapacke_zggevx_work.c
|
||||||
lapacke_zggglm.c
|
lapacke_zggglm.c
|
||||||
lapacke_zggglm_work.c
|
lapacke_zggglm_work.c
|
||||||
lapacke_zgghrd.c
|
|
||||||
lapacke_zgghrd_work.c
|
|
||||||
lapacke_zgghd3.c
|
lapacke_zgghd3.c
|
||||||
lapacke_zgghd3_work.c
|
lapacke_zgghd3_work.c
|
||||||
|
lapacke_zgghrd.c
|
||||||
|
lapacke_zgghrd_work.c
|
||||||
lapacke_zgglse.c
|
lapacke_zgglse.c
|
||||||
lapacke_zgglse_work.c
|
lapacke_zgglse_work.c
|
||||||
lapacke_zggqrf.c
|
lapacke_zggqrf.c
|
||||||
|
@ -1884,14 +1904,14 @@ lapacke_zgttrs.c
|
||||||
lapacke_zgttrs_work.c
|
lapacke_zgttrs_work.c
|
||||||
lapacke_zhbev.c
|
lapacke_zhbev.c
|
||||||
lapacke_zhbev_work.c
|
lapacke_zhbev_work.c
|
||||||
lapacke_zhbevd.c
|
|
||||||
lapacke_zhbevd_work.c
|
|
||||||
lapacke_zhbevx.c
|
|
||||||
lapacke_zhbevx_work.c
|
|
||||||
lapacke_zhbev_2stage.c
|
lapacke_zhbev_2stage.c
|
||||||
lapacke_zhbev_2stage_work.c
|
lapacke_zhbev_2stage_work.c
|
||||||
|
lapacke_zhbevd.c
|
||||||
|
lapacke_zhbevd_work.c
|
||||||
lapacke_zhbevd_2stage.c
|
lapacke_zhbevd_2stage.c
|
||||||
lapacke_zhbevd_2stage_work.c
|
lapacke_zhbevd_2stage_work.c
|
||||||
|
lapacke_zhbevx.c
|
||||||
|
lapacke_zhbevx_work.c
|
||||||
lapacke_zhbevx_2stage.c
|
lapacke_zhbevx_2stage.c
|
||||||
lapacke_zhbevx_2stage_work.c
|
lapacke_zhbevx_2stage_work.c
|
||||||
lapacke_zhbgst.c
|
lapacke_zhbgst.c
|
||||||
|
@ -1912,18 +1932,18 @@ lapacke_zheequb.c
|
||||||
lapacke_zheequb_work.c
|
lapacke_zheequb_work.c
|
||||||
lapacke_zheev.c
|
lapacke_zheev.c
|
||||||
lapacke_zheev_work.c
|
lapacke_zheev_work.c
|
||||||
lapacke_zheevd.c
|
|
||||||
lapacke_zheevd_work.c
|
|
||||||
lapacke_zheevr.c
|
|
||||||
lapacke_zheevr_work.c
|
|
||||||
lapacke_zheevx.c
|
|
||||||
lapacke_zheevx_work.c
|
|
||||||
lapacke_zheev_2stage.c
|
lapacke_zheev_2stage.c
|
||||||
lapacke_zheev_2stage_work.c
|
lapacke_zheev_2stage_work.c
|
||||||
|
lapacke_zheevd.c
|
||||||
|
lapacke_zheevd_work.c
|
||||||
lapacke_zheevd_2stage.c
|
lapacke_zheevd_2stage.c
|
||||||
lapacke_zheevd_2stage_work.c
|
lapacke_zheevd_2stage_work.c
|
||||||
|
lapacke_zheevr.c
|
||||||
|
lapacke_zheevr_work.c
|
||||||
lapacke_zheevr_2stage.c
|
lapacke_zheevr_2stage.c
|
||||||
lapacke_zheevr_2stage_work.c
|
lapacke_zheevr_2stage_work.c
|
||||||
|
lapacke_zheevx.c
|
||||||
|
lapacke_zheevx_work.c
|
||||||
lapacke_zheevx_2stage.c
|
lapacke_zheevx_2stage.c
|
||||||
lapacke_zheevx_2stage_work.c
|
lapacke_zheevx_2stage_work.c
|
||||||
lapacke_zhegst.c
|
lapacke_zhegst.c
|
||||||
|
@ -1941,8 +1961,8 @@ lapacke_zherfs_work.c
|
||||||
lapacke_zhesv.c
|
lapacke_zhesv.c
|
||||||
lapacke_zhesv_work.c
|
lapacke_zhesv_work.c
|
||||||
lapacke_zhesv_aa.c
|
lapacke_zhesv_aa.c
|
||||||
lapacke_zhesv_aa_2stage.c
|
|
||||||
lapacke_zhesv_aa_work.c
|
lapacke_zhesv_aa_work.c
|
||||||
|
lapacke_zhesv_aa_2stage.c
|
||||||
lapacke_zhesv_aa_2stage_work.c
|
lapacke_zhesv_aa_2stage_work.c
|
||||||
lapacke_zhesv_rk.c
|
lapacke_zhesv_rk.c
|
||||||
lapacke_zhesv_rk_work.c
|
lapacke_zhesv_rk_work.c
|
||||||
|
@ -1953,34 +1973,34 @@ lapacke_zheswapr_work.c
|
||||||
lapacke_zhetrd.c
|
lapacke_zhetrd.c
|
||||||
lapacke_zhetrd_work.c
|
lapacke_zhetrd_work.c
|
||||||
lapacke_zhetrf.c
|
lapacke_zhetrf.c
|
||||||
lapacke_zhetrf_rook.c
|
|
||||||
lapacke_zhetrf_work.c
|
lapacke_zhetrf_work.c
|
||||||
lapacke_zhetrf_rook_work.c
|
|
||||||
lapacke_zhetrf_aa.c
|
lapacke_zhetrf_aa.c
|
||||||
lapacke_zhetrf_aa_2stage.c
|
|
||||||
lapacke_zhetrf_aa_work.c
|
lapacke_zhetrf_aa_work.c
|
||||||
|
lapacke_zhetrf_aa_2stage.c
|
||||||
lapacke_zhetrf_aa_2stage_work.c
|
lapacke_zhetrf_aa_2stage_work.c
|
||||||
lapacke_zhetrf_rk.c
|
lapacke_zhetrf_rk.c
|
||||||
lapacke_zhetrf_rk_work.c
|
lapacke_zhetrf_rk_work.c
|
||||||
|
lapacke_zhetrf_rook.c
|
||||||
|
lapacke_zhetrf_rook_work.c
|
||||||
lapacke_zhetri.c
|
lapacke_zhetri.c
|
||||||
|
lapacke_zhetri_work.c
|
||||||
lapacke_zhetri2.c
|
lapacke_zhetri2.c
|
||||||
lapacke_zhetri2_work.c
|
lapacke_zhetri2_work.c
|
||||||
lapacke_zhetri_3.c
|
|
||||||
lapacke_zhetri_3_work.c
|
|
||||||
lapacke_zhetri2x.c
|
lapacke_zhetri2x.c
|
||||||
lapacke_zhetri2x_work.c
|
lapacke_zhetri2x_work.c
|
||||||
lapacke_zhetri_work.c
|
lapacke_zhetri_3.c
|
||||||
|
lapacke_zhetri_3_work.c
|
||||||
lapacke_zhetrs.c
|
lapacke_zhetrs.c
|
||||||
lapacke_zhetrs_rook.c
|
lapacke_zhetrs_work.c
|
||||||
lapacke_zhetrs2.c
|
lapacke_zhetrs2.c
|
||||||
lapacke_zhetrs2_work.c
|
lapacke_zhetrs2_work.c
|
||||||
lapacke_zhetrs_work.c
|
|
||||||
lapacke_zhetrs_aa.c
|
|
||||||
lapacke_zhetrs_aa_2stage.c
|
|
||||||
lapacke_zhetrs_aa_work.c
|
|
||||||
lapacke_zhetrs_aa_2stage_work.c
|
|
||||||
lapacke_zhetrs_3.c
|
lapacke_zhetrs_3.c
|
||||||
lapacke_zhetrs_3_work.c
|
lapacke_zhetrs_3_work.c
|
||||||
|
lapacke_zhetrs_aa.c
|
||||||
|
lapacke_zhetrs_aa_work.c
|
||||||
|
lapacke_zhetrs_aa_2stage.c
|
||||||
|
lapacke_zhetrs_aa_2stage_work.c
|
||||||
|
lapacke_zhetrs_rook.c
|
||||||
lapacke_zhetrs_rook_work.c
|
lapacke_zhetrs_rook_work.c
|
||||||
lapacke_zhfrk.c
|
lapacke_zhfrk.c
|
||||||
lapacke_zhfrk_work.c
|
lapacke_zhfrk_work.c
|
||||||
|
@ -2172,52 +2192,54 @@ lapacke_zsyconv.c
|
||||||
lapacke_zsyconv_work.c
|
lapacke_zsyconv_work.c
|
||||||
lapacke_zsyequb.c
|
lapacke_zsyequb.c
|
||||||
lapacke_zsyequb_work.c
|
lapacke_zsyequb_work.c
|
||||||
|
lapacke_zsyr.c
|
||||||
|
lapacke_zsyr_work.c
|
||||||
lapacke_zsyrfs.c
|
lapacke_zsyrfs.c
|
||||||
lapacke_zsyrfs_work.c
|
lapacke_zsyrfs_work.c
|
||||||
lapacke_zsysv.c
|
lapacke_zsysv.c
|
||||||
lapacke_zsysv_rook.c
|
|
||||||
lapacke_zsysv_rook_work.c
|
|
||||||
lapacke_zsysv_work.c
|
lapacke_zsysv_work.c
|
||||||
lapacke_zsysv_aa.c
|
lapacke_zsysv_aa.c
|
||||||
lapacke_zsysv_aa_2stage.c
|
|
||||||
lapacke_zsysv_aa_work.c
|
lapacke_zsysv_aa_work.c
|
||||||
|
lapacke_zsysv_aa_2stage.c
|
||||||
lapacke_zsysv_aa_2stage_work.c
|
lapacke_zsysv_aa_2stage_work.c
|
||||||
lapacke_zsysv_rk.c
|
lapacke_zsysv_rk.c
|
||||||
lapacke_zsysv_rk_work.c
|
lapacke_zsysv_rk_work.c
|
||||||
|
lapacke_zsysv_rook.c
|
||||||
|
lapacke_zsysv_rook_work.c
|
||||||
lapacke_zsysvx.c
|
lapacke_zsysvx.c
|
||||||
lapacke_zsysvx_work.c
|
lapacke_zsysvx_work.c
|
||||||
lapacke_zsyswapr.c
|
lapacke_zsyswapr.c
|
||||||
lapacke_zsyswapr_work.c
|
lapacke_zsyswapr_work.c
|
||||||
lapacke_zsytrf.c
|
lapacke_zsytrf.c
|
||||||
lapacke_zsytrf_work.c
|
lapacke_zsytrf_work.c
|
||||||
lapacke_zsytrf_rook.c
|
|
||||||
lapacke_zsytrf_rook_work.c
|
|
||||||
lapacke_zsytrf_aa.c
|
lapacke_zsytrf_aa.c
|
||||||
lapacke_zsytrf_aa_2stage.c
|
|
||||||
lapacke_zsytrf_aa_work.c
|
lapacke_zsytrf_aa_work.c
|
||||||
|
lapacke_zsytrf_aa_2stage.c
|
||||||
lapacke_zsytrf_aa_2stage_work.c
|
lapacke_zsytrf_aa_2stage_work.c
|
||||||
lapacke_zsytrf_rk.c
|
lapacke_zsytrf_rk.c
|
||||||
lapacke_zsytrf_rk_work.c
|
lapacke_zsytrf_rk_work.c
|
||||||
|
lapacke_zsytrf_rook.c
|
||||||
|
lapacke_zsytrf_rook_work.c
|
||||||
lapacke_zsytri.c
|
lapacke_zsytri.c
|
||||||
|
lapacke_zsytri_work.c
|
||||||
lapacke_zsytri2.c
|
lapacke_zsytri2.c
|
||||||
lapacke_zsytri2_work.c
|
lapacke_zsytri2_work.c
|
||||||
lapacke_zsytri_3.c
|
|
||||||
lapacke_zsytri_3_work.c
|
|
||||||
lapacke_zsytri2x.c
|
lapacke_zsytri2x.c
|
||||||
lapacke_zsytri2x_work.c
|
lapacke_zsytri2x_work.c
|
||||||
lapacke_zsytri_work.c
|
lapacke_zsytri_3.c
|
||||||
|
lapacke_zsytri_3_work.c
|
||||||
lapacke_zsytrs.c
|
lapacke_zsytrs.c
|
||||||
lapacke_zsytrs_rook.c
|
lapacke_zsytrs_work.c
|
||||||
lapacke_zsytrs2.c
|
lapacke_zsytrs2.c
|
||||||
lapacke_zsytrs2_work.c
|
lapacke_zsytrs2_work.c
|
||||||
lapacke_zsytrs_work.c
|
|
||||||
lapacke_zsytrs_rook_work.c
|
|
||||||
lapacke_zsytrs_aa.c
|
|
||||||
lapacke_zsytrs_aa_2stage.c
|
|
||||||
lapacke_zsytrs_aa_work.c
|
|
||||||
lapacke_zsytrs_aa_2stage_work.c
|
|
||||||
lapacke_zsytrs_3.c
|
lapacke_zsytrs_3.c
|
||||||
lapacke_zsytrs_3_work.c
|
lapacke_zsytrs_3_work.c
|
||||||
|
lapacke_zsytrs_aa.c
|
||||||
|
lapacke_zsytrs_aa_work.c
|
||||||
|
lapacke_zsytrs_aa_2stage.c
|
||||||
|
lapacke_zsytrs_aa_2stage_work.c
|
||||||
|
lapacke_zsytrs_rook.c
|
||||||
|
lapacke_zsytrs_rook_work.c
|
||||||
lapacke_ztbcon.c
|
lapacke_ztbcon.c
|
||||||
lapacke_ztbcon_work.c
|
lapacke_ztbcon_work.c
|
||||||
lapacke_ztbrfs.c
|
lapacke_ztbrfs.c
|
||||||
|
@ -2249,9 +2271,9 @@ lapacke_ztpcon_work.c
|
||||||
lapacke_ztpmqrt.c
|
lapacke_ztpmqrt.c
|
||||||
lapacke_ztpmqrt_work.c
|
lapacke_ztpmqrt_work.c
|
||||||
lapacke_ztpqrt.c
|
lapacke_ztpqrt.c
|
||||||
|
lapacke_ztpqrt_work.c
|
||||||
lapacke_ztpqrt2.c
|
lapacke_ztpqrt2.c
|
||||||
lapacke_ztpqrt2_work.c
|
lapacke_ztpqrt2_work.c
|
||||||
lapacke_ztpqrt_work.c
|
|
||||||
lapacke_ztprfb.c
|
lapacke_ztprfb.c
|
||||||
lapacke_ztprfb_work.c
|
lapacke_ztprfb_work.c
|
||||||
lapacke_ztprfs.c
|
lapacke_ztprfs.c
|
||||||
|
@ -2328,11 +2350,6 @@ lapacke_zupgtr.c
|
||||||
lapacke_zupgtr_work.c
|
lapacke_zupgtr_work.c
|
||||||
lapacke_zupmtr.c
|
lapacke_zupmtr.c
|
||||||
lapacke_zupmtr_work.c
|
lapacke_zupmtr_work.c
|
||||||
lapacke_zsyr.c
|
|
||||||
lapacke_csyr.c
|
|
||||||
lapacke_zsyr_work.c
|
|
||||||
lapacke_csyr_work.c
|
|
||||||
lapacke_ilaver.c
|
|
||||||
)
|
)
|
||||||
|
|
||||||
set(DEPRECATED
|
set(DEPRECATED
|
||||||
|
|
|
@ -32,12 +32,21 @@
|
||||||
##############################################################################
|
##############################################################################
|
||||||
# makefile for LAPACKE, used to build lapacke binary.
|
# makefile for LAPACKE, used to build lapacke binary.
|
||||||
#
|
#
|
||||||
# Note: we use multiple OBJ_A, OBJ_B, etc, instead of a single OBJ
|
# Note: we use multiple OBJ_S, OBJ_C, etc, instead of a single OBJ
|
||||||
# to allow build with mingw (argument list too long for the msys ar)
|
# to allow build with mingw (argument list too long for the msys ar)
|
||||||
#
|
#
|
||||||
include ../../make.inc
|
TOPSRCDIR = ../..
|
||||||
|
include $(TOPSRCDIR)/make.inc
|
||||||
|
|
||||||
OBJ_A = \
|
.SUFFIXES: .c .o
|
||||||
|
.c.o:
|
||||||
|
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
||||||
|
|
||||||
|
OBJ = \
|
||||||
|
lapacke_ilaver.o \
|
||||||
|
lapacke_nancheck.o
|
||||||
|
|
||||||
|
OBJ_C = \
|
||||||
lapacke_cbbcsd.o \
|
lapacke_cbbcsd.o \
|
||||||
lapacke_cbbcsd_work.o \
|
lapacke_cbbcsd_work.o \
|
||||||
lapacke_cbdsqr.o \
|
lapacke_cbdsqr.o \
|
||||||
|
@ -82,12 +91,12 @@ lapacke_cgeevx.o \
|
||||||
lapacke_cgeevx_work.o \
|
lapacke_cgeevx_work.o \
|
||||||
lapacke_cgehrd.o \
|
lapacke_cgehrd.o \
|
||||||
lapacke_cgehrd_work.o \
|
lapacke_cgehrd_work.o \
|
||||||
|
lapacke_cgejsv.o \
|
||||||
|
lapacke_cgejsv_work.o \
|
||||||
lapacke_cgelq.o \
|
lapacke_cgelq.o \
|
||||||
lapacke_cgelq_work.o \
|
lapacke_cgelq_work.o \
|
||||||
lapacke_cgelq2.o \
|
lapacke_cgelq2.o \
|
||||||
lapacke_cgelq2_work.o \
|
lapacke_cgelq2_work.o \
|
||||||
lapacke_cgejsv.o \
|
|
||||||
lapacke_cgejsv_work.o \
|
|
||||||
lapacke_cgelqf.o \
|
lapacke_cgelqf.o \
|
||||||
lapacke_cgelqf_work.o \
|
lapacke_cgelqf_work.o \
|
||||||
lapacke_cgels.o \
|
lapacke_cgels.o \
|
||||||
|
@ -117,11 +126,11 @@ lapacke_cgeqrf_work.o \
|
||||||
lapacke_cgeqrfp.o \
|
lapacke_cgeqrfp.o \
|
||||||
lapacke_cgeqrfp_work.o \
|
lapacke_cgeqrfp_work.o \
|
||||||
lapacke_cgeqrt.o \
|
lapacke_cgeqrt.o \
|
||||||
|
lapacke_cgeqrt_work.o \
|
||||||
lapacke_cgeqrt2.o \
|
lapacke_cgeqrt2.o \
|
||||||
lapacke_cgeqrt2_work.o \
|
lapacke_cgeqrt2_work.o \
|
||||||
lapacke_cgeqrt3.o \
|
lapacke_cgeqrt3.o \
|
||||||
lapacke_cgeqrt3_work.o \
|
lapacke_cgeqrt3_work.o \
|
||||||
lapacke_cgeqrt_work.o \
|
|
||||||
lapacke_cgerfs.o \
|
lapacke_cgerfs.o \
|
||||||
lapacke_cgerfs_work.o \
|
lapacke_cgerfs_work.o \
|
||||||
lapacke_cgerqf.o \
|
lapacke_cgerqf.o \
|
||||||
|
@ -132,6 +141,8 @@ lapacke_cgesv.o \
|
||||||
lapacke_cgesv_work.o \
|
lapacke_cgesv_work.o \
|
||||||
lapacke_cgesvd.o \
|
lapacke_cgesvd.o \
|
||||||
lapacke_cgesvd_work.o \
|
lapacke_cgesvd_work.o \
|
||||||
|
lapacke_cgesvdq.o \
|
||||||
|
lapacke_cgesvdq_work.o \
|
||||||
lapacke_cgesvdx.o \
|
lapacke_cgesvdx.o \
|
||||||
lapacke_cgesvdx_work.o \
|
lapacke_cgesvdx_work.o \
|
||||||
lapacke_cgesvj.o \
|
lapacke_cgesvj.o \
|
||||||
|
@ -168,10 +179,10 @@ lapacke_cggevx.o \
|
||||||
lapacke_cggevx_work.o \
|
lapacke_cggevx_work.o \
|
||||||
lapacke_cggglm.o \
|
lapacke_cggglm.o \
|
||||||
lapacke_cggglm_work.o \
|
lapacke_cggglm_work.o \
|
||||||
lapacke_cgghrd.o \
|
|
||||||
lapacke_cgghrd_work.o \
|
|
||||||
lapacke_cgghd3.o \
|
lapacke_cgghd3.o \
|
||||||
lapacke_cgghd3_work.o \
|
lapacke_cgghd3_work.o \
|
||||||
|
lapacke_cgghrd.o \
|
||||||
|
lapacke_cgghrd_work.o \
|
||||||
lapacke_cgglse.o \
|
lapacke_cgglse.o \
|
||||||
lapacke_cgglse_work.o \
|
lapacke_cgglse_work.o \
|
||||||
lapacke_cggqrf.o \
|
lapacke_cggqrf.o \
|
||||||
|
@ -196,14 +207,14 @@ lapacke_cgttrs.o \
|
||||||
lapacke_cgttrs_work.o \
|
lapacke_cgttrs_work.o \
|
||||||
lapacke_chbev.o \
|
lapacke_chbev.o \
|
||||||
lapacke_chbev_work.o \
|
lapacke_chbev_work.o \
|
||||||
lapacke_chbevd.o \
|
|
||||||
lapacke_chbevd_work.o \
|
|
||||||
lapacke_chbevx.o \
|
|
||||||
lapacke_chbevx_work.o \
|
|
||||||
lapacke_chbev_2stage.o \
|
lapacke_chbev_2stage.o \
|
||||||
lapacke_chbev_2stage_work.o \
|
lapacke_chbev_2stage_work.o \
|
||||||
|
lapacke_chbevd.o \
|
||||||
|
lapacke_chbevd_work.o \
|
||||||
lapacke_chbevd_2stage.o \
|
lapacke_chbevd_2stage.o \
|
||||||
lapacke_chbevd_2stage_work.o \
|
lapacke_chbevd_2stage_work.o \
|
||||||
|
lapacke_chbevx.o \
|
||||||
|
lapacke_chbevx_work.o \
|
||||||
lapacke_chbevx_2stage.o \
|
lapacke_chbevx_2stage.o \
|
||||||
lapacke_chbevx_2stage_work.o \
|
lapacke_chbevx_2stage_work.o \
|
||||||
lapacke_chbgst.o \
|
lapacke_chbgst.o \
|
||||||
|
@ -224,18 +235,18 @@ lapacke_cheequb.o \
|
||||||
lapacke_cheequb_work.o \
|
lapacke_cheequb_work.o \
|
||||||
lapacke_cheev.o \
|
lapacke_cheev.o \
|
||||||
lapacke_cheev_work.o \
|
lapacke_cheev_work.o \
|
||||||
lapacke_cheevd.o \
|
|
||||||
lapacke_cheevd_work.o \
|
|
||||||
lapacke_cheevr.o \
|
|
||||||
lapacke_cheevr_work.o \
|
|
||||||
lapacke_cheevx.o \
|
|
||||||
lapacke_cheevx_work.o \
|
|
||||||
lapacke_cheev_2stage.o \
|
lapacke_cheev_2stage.o \
|
||||||
lapacke_cheev_2stage_work.o \
|
lapacke_cheev_2stage_work.o \
|
||||||
|
lapacke_cheevd.o \
|
||||||
|
lapacke_cheevd_work.o \
|
||||||
lapacke_cheevd_2stage.o \
|
lapacke_cheevd_2stage.o \
|
||||||
lapacke_cheevd_2stage_work.o \
|
lapacke_cheevd_2stage_work.o \
|
||||||
|
lapacke_cheevr.o \
|
||||||
|
lapacke_cheevr_work.o \
|
||||||
lapacke_cheevr_2stage.o \
|
lapacke_cheevr_2stage.o \
|
||||||
lapacke_cheevr_2stage_work.o \
|
lapacke_cheevr_2stage_work.o \
|
||||||
|
lapacke_cheevx.o \
|
||||||
|
lapacke_cheevx_work.o \
|
||||||
lapacke_cheevx_2stage.o \
|
lapacke_cheevx_2stage.o \
|
||||||
lapacke_cheevx_2stage_work.o \
|
lapacke_cheevx_2stage_work.o \
|
||||||
lapacke_chegst.o \
|
lapacke_chegst.o \
|
||||||
|
@ -265,35 +276,35 @@ lapacke_cheswapr_work.o \
|
||||||
lapacke_chetrd.o \
|
lapacke_chetrd.o \
|
||||||
lapacke_chetrd_work.o \
|
lapacke_chetrd_work.o \
|
||||||
lapacke_chetrf.o \
|
lapacke_chetrf.o \
|
||||||
lapacke_chetrf_rook.o \
|
|
||||||
lapacke_chetrf_work.o \
|
lapacke_chetrf_work.o \
|
||||||
lapacke_chetrf_rook_work.o \
|
|
||||||
lapacke_chetrf_aa.o \
|
lapacke_chetrf_aa.o \
|
||||||
lapacke_chetrf_aa_2stage.o \
|
|
||||||
lapacke_chetrf_aa_work.o \
|
lapacke_chetrf_aa_work.o \
|
||||||
|
lapacke_chetrf_aa_2stage.o \
|
||||||
lapacke_chetrf_aa_2stage_work.o \
|
lapacke_chetrf_aa_2stage_work.o \
|
||||||
lapacke_chetrf_rk.o \
|
lapacke_chetrf_rk.o \
|
||||||
lapacke_chetrf_rk_work.o \
|
lapacke_chetrf_rk_work.o \
|
||||||
|
lapacke_chetrf_rook.o \
|
||||||
|
lapacke_chetrf_rook_work.o \
|
||||||
lapacke_chetri.o \
|
lapacke_chetri.o \
|
||||||
|
lapacke_chetri_work.o \
|
||||||
lapacke_chetri2.o \
|
lapacke_chetri2.o \
|
||||||
lapacke_chetri2_work.o \
|
lapacke_chetri2_work.o \
|
||||||
lapacke_chetri_3.o \
|
|
||||||
lapacke_chetri_3_work.o \
|
|
||||||
lapacke_chetri2x.o \
|
lapacke_chetri2x.o \
|
||||||
lapacke_chetri2x_work.o \
|
lapacke_chetri2x_work.o \
|
||||||
lapacke_chetri_work.o \
|
lapacke_chetri_3.o \
|
||||||
|
lapacke_chetri_3_work.o \
|
||||||
lapacke_chetrs.o \
|
lapacke_chetrs.o \
|
||||||
lapacke_chetrs_rook.o \
|
lapacke_chetrs_work.o \
|
||||||
lapacke_chetrs2.o \
|
lapacke_chetrs2.o \
|
||||||
lapacke_chetrs2_work.o \
|
lapacke_chetrs2_work.o \
|
||||||
lapacke_chetrs_work.o \
|
|
||||||
lapacke_chetrs_rook_work.o \
|
|
||||||
lapacke_chetrs_aa.o \
|
|
||||||
lapacke_chetrs_aa_2stage.o \
|
|
||||||
lapacke_chetrs_aa_work.o \
|
|
||||||
lapacke_chetrs_aa_2stage_work.o \
|
|
||||||
lapacke_chetrs_3.o \
|
lapacke_chetrs_3.o \
|
||||||
lapacke_chetrs_3_work.o \
|
lapacke_chetrs_3_work.o \
|
||||||
|
lapacke_chetrs_aa.o \
|
||||||
|
lapacke_chetrs_aa_work.o \
|
||||||
|
lapacke_chetrs_aa_2stage.o \
|
||||||
|
lapacke_chetrs_aa_2stage_work.o \
|
||||||
|
lapacke_chetrs_rook.o \
|
||||||
|
lapacke_chetrs_rook_work.o \
|
||||||
lapacke_chfrk.o \
|
lapacke_chfrk.o \
|
||||||
lapacke_chfrk_work.o \
|
lapacke_chfrk_work.o \
|
||||||
lapacke_chgeqz.o \
|
lapacke_chgeqz.o \
|
||||||
|
@ -484,11 +495,11 @@ lapacke_csyconv.o \
|
||||||
lapacke_csyconv_work.o \
|
lapacke_csyconv_work.o \
|
||||||
lapacke_csyequb.o \
|
lapacke_csyequb.o \
|
||||||
lapacke_csyequb_work.o \
|
lapacke_csyequb_work.o \
|
||||||
|
lapacke_csyr.o \
|
||||||
|
lapacke_csyr_work.o \
|
||||||
lapacke_csyrfs.o \
|
lapacke_csyrfs.o \
|
||||||
lapacke_csyrfs_work.o \
|
lapacke_csyrfs_work.o \
|
||||||
lapacke_csysv.o \
|
lapacke_csysv.o \
|
||||||
lapacke_csysv_rook.o \
|
|
||||||
lapacke_csysv_rook_work.o \
|
|
||||||
lapacke_csysv_work.o \
|
lapacke_csysv_work.o \
|
||||||
lapacke_csysv_aa.o \
|
lapacke_csysv_aa.o \
|
||||||
lapacke_csysv_aa_work.o \
|
lapacke_csysv_aa_work.o \
|
||||||
|
@ -496,40 +507,42 @@ lapacke_csysv_aa_2stage.o \
|
||||||
lapacke_csysv_aa_2stage_work.o \
|
lapacke_csysv_aa_2stage_work.o \
|
||||||
lapacke_csysv_rk.o \
|
lapacke_csysv_rk.o \
|
||||||
lapacke_csysv_rk_work.o \
|
lapacke_csysv_rk_work.o \
|
||||||
|
lapacke_csysv_rook.o \
|
||||||
|
lapacke_csysv_rook_work.o \
|
||||||
lapacke_csysvx.o \
|
lapacke_csysvx.o \
|
||||||
lapacke_csysvx_work.o \
|
lapacke_csysvx_work.o \
|
||||||
lapacke_csyswapr.o \
|
lapacke_csyswapr.o \
|
||||||
lapacke_csyswapr_work.o \
|
lapacke_csyswapr_work.o \
|
||||||
lapacke_csytrf.o \
|
lapacke_csytrf.o \
|
||||||
lapacke_csytrf_work.o \
|
lapacke_csytrf_work.o \
|
||||||
lapacke_csytrf_rook.o \
|
|
||||||
lapacke_csytrf_rook_work.o \
|
|
||||||
lapacke_csytrf_aa.o \
|
lapacke_csytrf_aa.o \
|
||||||
lapacke_csytrf_aa_2stage.o \
|
|
||||||
lapacke_csytrf_aa_work.o \
|
lapacke_csytrf_aa_work.o \
|
||||||
|
lapacke_csytrf_aa_2stage.o \
|
||||||
lapacke_csytrf_aa_2stage_work.o \
|
lapacke_csytrf_aa_2stage_work.o \
|
||||||
lapacke_csytrf_rk.o \
|
lapacke_csytrf_rk.o \
|
||||||
lapacke_csytrf_rk_work.o \
|
lapacke_csytrf_rk_work.o \
|
||||||
|
lapacke_csytrf_rook.o \
|
||||||
|
lapacke_csytrf_rook_work.o \
|
||||||
lapacke_csytri.o \
|
lapacke_csytri.o \
|
||||||
|
lapacke_csytri_work.o \
|
||||||
lapacke_csytri2.o \
|
lapacke_csytri2.o \
|
||||||
lapacke_csytri2_work.o \
|
lapacke_csytri2_work.o \
|
||||||
lapacke_csytri_3.o \
|
|
||||||
lapacke_csytri_3_work.o \
|
|
||||||
lapacke_csytri2x.o \
|
lapacke_csytri2x.o \
|
||||||
lapacke_csytri2x_work.o \
|
lapacke_csytri2x_work.o \
|
||||||
lapacke_csytri_work.o \
|
lapacke_csytri_3.o \
|
||||||
|
lapacke_csytri_3_work.o \
|
||||||
lapacke_csytrs.o \
|
lapacke_csytrs.o \
|
||||||
lapacke_csytrs_rook.o \
|
lapacke_csytrs_work.o \
|
||||||
lapacke_csytrs2.o \
|
lapacke_csytrs2.o \
|
||||||
lapacke_csytrs2_work.o \
|
lapacke_csytrs2_work.o \
|
||||||
lapacke_csytrs_work.o \
|
|
||||||
lapacke_csytrs_rook_work.o \
|
|
||||||
lapacke_csytrs_aa.o \
|
|
||||||
lapacke_csytrs_aa_2stage.o \
|
|
||||||
lapacke_csytrs_aa_work.o \
|
|
||||||
lapacke_csytrs_aa_2stage_work.o \
|
|
||||||
lapacke_csytrs_3.o \
|
lapacke_csytrs_3.o \
|
||||||
lapacke_csytrs_3_work.o \
|
lapacke_csytrs_3_work.o \
|
||||||
|
lapacke_csytrs_aa.o \
|
||||||
|
lapacke_csytrs_aa_work.o \
|
||||||
|
lapacke_csytrs_aa_2stage.o \
|
||||||
|
lapacke_csytrs_aa_2stage_work.o \
|
||||||
|
lapacke_csytrs_rook.o \
|
||||||
|
lapacke_csytrs_rook_work.o \
|
||||||
lapacke_ctbcon.o \
|
lapacke_ctbcon.o \
|
||||||
lapacke_ctbcon_work.o \
|
lapacke_ctbcon_work.o \
|
||||||
lapacke_ctbrfs.o \
|
lapacke_ctbrfs.o \
|
||||||
|
@ -561,9 +574,9 @@ lapacke_ctpcon_work.o \
|
||||||
lapacke_ctpmqrt.o \
|
lapacke_ctpmqrt.o \
|
||||||
lapacke_ctpmqrt_work.o \
|
lapacke_ctpmqrt_work.o \
|
||||||
lapacke_ctpqrt.o \
|
lapacke_ctpqrt.o \
|
||||||
|
lapacke_ctpqrt_work.o \
|
||||||
lapacke_ctpqrt2.o \
|
lapacke_ctpqrt2.o \
|
||||||
lapacke_ctpqrt2_work.o \
|
lapacke_ctpqrt2_work.o \
|
||||||
lapacke_ctpqrt_work.o \
|
|
||||||
lapacke_ctprfb.o \
|
lapacke_ctprfb.o \
|
||||||
lapacke_ctprfb_work.o \
|
lapacke_ctprfb_work.o \
|
||||||
lapacke_ctprfs.o \
|
lapacke_ctprfs.o \
|
||||||
|
@ -639,15 +652,17 @@ lapacke_cunmtr_work.o \
|
||||||
lapacke_cupgtr.o \
|
lapacke_cupgtr.o \
|
||||||
lapacke_cupgtr_work.o \
|
lapacke_cupgtr_work.o \
|
||||||
lapacke_cupmtr.o \
|
lapacke_cupmtr.o \
|
||||||
lapacke_cupmtr_work.o \
|
lapacke_cupmtr_work.o
|
||||||
|
|
||||||
|
OBJ_D = \
|
||||||
lapacke_dbbcsd.o \
|
lapacke_dbbcsd.o \
|
||||||
lapacke_dbbcsd_work.o \
|
lapacke_dbbcsd_work.o \
|
||||||
lapacke_dbdsdc.o \
|
lapacke_dbdsdc.o \
|
||||||
lapacke_dbdsdc_work.o \
|
lapacke_dbdsdc_work.o \
|
||||||
lapacke_dbdsvdx.o \
|
|
||||||
lapacke_dbdsvdx_work.o \
|
|
||||||
lapacke_dbdsqr.o \
|
lapacke_dbdsqr.o \
|
||||||
lapacke_dbdsqr_work.o \
|
lapacke_dbdsqr_work.o \
|
||||||
|
lapacke_dbdsvdx.o \
|
||||||
|
lapacke_dbdsvdx_work.o \
|
||||||
lapacke_ddisna.o \
|
lapacke_ddisna.o \
|
||||||
lapacke_ddisna_work.o \
|
lapacke_ddisna_work.o \
|
||||||
lapacke_dgbbrd.o \
|
lapacke_dgbbrd.o \
|
||||||
|
@ -725,11 +740,11 @@ lapacke_dgeqrf_work.o \
|
||||||
lapacke_dgeqrfp.o \
|
lapacke_dgeqrfp.o \
|
||||||
lapacke_dgeqrfp_work.o \
|
lapacke_dgeqrfp_work.o \
|
||||||
lapacke_dgeqrt.o \
|
lapacke_dgeqrt.o \
|
||||||
|
lapacke_dgeqrt_work.o \
|
||||||
lapacke_dgeqrt2.o \
|
lapacke_dgeqrt2.o \
|
||||||
lapacke_dgeqrt2_work.o \
|
lapacke_dgeqrt2_work.o \
|
||||||
lapacke_dgeqrt3.o \
|
lapacke_dgeqrt3.o \
|
||||||
lapacke_dgeqrt3_work.o \
|
lapacke_dgeqrt3_work.o \
|
||||||
lapacke_dgeqrt_work.o \
|
|
||||||
lapacke_dgerfs.o \
|
lapacke_dgerfs.o \
|
||||||
lapacke_dgerfs_work.o \
|
lapacke_dgerfs_work.o \
|
||||||
lapacke_dgerqf.o \
|
lapacke_dgerqf.o \
|
||||||
|
@ -740,6 +755,8 @@ lapacke_dgesv.o \
|
||||||
lapacke_dgesv_work.o \
|
lapacke_dgesv_work.o \
|
||||||
lapacke_dgesvd.o \
|
lapacke_dgesvd.o \
|
||||||
lapacke_dgesvd_work.o \
|
lapacke_dgesvd_work.o \
|
||||||
|
lapacke_dgesvdq.o \
|
||||||
|
lapacke_dgesvdq_work.o \
|
||||||
lapacke_dgesvdx.o \
|
lapacke_dgesvdx.o \
|
||||||
lapacke_dgesvdx_work.o \
|
lapacke_dgesvdx_work.o \
|
||||||
lapacke_dgesvj.o \
|
lapacke_dgesvj.o \
|
||||||
|
@ -776,10 +793,10 @@ lapacke_dggevx.o \
|
||||||
lapacke_dggevx_work.o \
|
lapacke_dggevx_work.o \
|
||||||
lapacke_dggglm.o \
|
lapacke_dggglm.o \
|
||||||
lapacke_dggglm_work.o \
|
lapacke_dggglm_work.o \
|
||||||
lapacke_dgghrd.o \
|
|
||||||
lapacke_dgghrd_work.o \
|
|
||||||
lapacke_dgghd3.o \
|
lapacke_dgghd3.o \
|
||||||
lapacke_dgghd3_work.o \
|
lapacke_dgghd3_work.o \
|
||||||
|
lapacke_dgghrd.o \
|
||||||
|
lapacke_dgghrd_work.o \
|
||||||
lapacke_dgglse.o \
|
lapacke_dgglse.o \
|
||||||
lapacke_dgglse_work.o \
|
lapacke_dgglse_work.o \
|
||||||
lapacke_dggqrf.o \
|
lapacke_dggqrf.o \
|
||||||
|
@ -972,14 +989,14 @@ lapacke_dpttrs.o \
|
||||||
lapacke_dpttrs_work.o \
|
lapacke_dpttrs_work.o \
|
||||||
lapacke_dsbev.o \
|
lapacke_dsbev.o \
|
||||||
lapacke_dsbev_work.o \
|
lapacke_dsbev_work.o \
|
||||||
lapacke_dsbevd.o \
|
|
||||||
lapacke_dsbevd_work.o \
|
|
||||||
lapacke_dsbevx.o \
|
|
||||||
lapacke_dsbevx_work.o \
|
|
||||||
lapacke_dsbev_2stage.o \
|
lapacke_dsbev_2stage.o \
|
||||||
lapacke_dsbev_2stage_work.o \
|
lapacke_dsbev_2stage_work.o \
|
||||||
|
lapacke_dsbevd.o \
|
||||||
|
lapacke_dsbevd_work.o \
|
||||||
lapacke_dsbevd_2stage.o \
|
lapacke_dsbevd_2stage.o \
|
||||||
lapacke_dsbevd_2stage_work.o \
|
lapacke_dsbevd_2stage_work.o \
|
||||||
|
lapacke_dsbevx.o \
|
||||||
|
lapacke_dsbevx_work.o \
|
||||||
lapacke_dsbevx_2stage.o \
|
lapacke_dsbevx_2stage.o \
|
||||||
lapacke_dsbevx_2stage_work.o \
|
lapacke_dsbevx_2stage_work.o \
|
||||||
lapacke_dsbgst.o \
|
lapacke_dsbgst.o \
|
||||||
|
@ -1060,18 +1077,18 @@ lapacke_dsyequb.o \
|
||||||
lapacke_dsyequb_work.o \
|
lapacke_dsyequb_work.o \
|
||||||
lapacke_dsyev.o \
|
lapacke_dsyev.o \
|
||||||
lapacke_dsyev_work.o \
|
lapacke_dsyev_work.o \
|
||||||
lapacke_dsyevd.o \
|
|
||||||
lapacke_dsyevd_work.o \
|
|
||||||
lapacke_dsyevr.o \
|
|
||||||
lapacke_dsyevr_work.o \
|
|
||||||
lapacke_dsyevx.o \
|
|
||||||
lapacke_dsyevx_work.o \
|
|
||||||
lapacke_dsyev_2stage.o \
|
lapacke_dsyev_2stage.o \
|
||||||
lapacke_dsyev_2stage_work.o \
|
lapacke_dsyev_2stage_work.o \
|
||||||
|
lapacke_dsyevd.o \
|
||||||
|
lapacke_dsyevd_work.o \
|
||||||
lapacke_dsyevd_2stage.o \
|
lapacke_dsyevd_2stage.o \
|
||||||
lapacke_dsyevd_2stage_work.o \
|
lapacke_dsyevd_2stage_work.o \
|
||||||
|
lapacke_dsyevr.o \
|
||||||
|
lapacke_dsyevr_work.o \
|
||||||
lapacke_dsyevr_2stage.o \
|
lapacke_dsyevr_2stage.o \
|
||||||
lapacke_dsyevr_2stage_work.o \
|
lapacke_dsyevr_2stage_work.o \
|
||||||
|
lapacke_dsyevx.o \
|
||||||
|
lapacke_dsyevx_work.o \
|
||||||
lapacke_dsyevx_2stage.o \
|
lapacke_dsyevx_2stage.o \
|
||||||
lapacke_dsyevx_2stage_work.o \
|
lapacke_dsyevx_2stage_work.o \
|
||||||
lapacke_dsygst.o \
|
lapacke_dsygst.o \
|
||||||
|
@ -1087,8 +1104,6 @@ lapacke_dsygvx_work.o \
|
||||||
lapacke_dsyrfs.o \
|
lapacke_dsyrfs.o \
|
||||||
lapacke_dsyrfs_work.o \
|
lapacke_dsyrfs_work.o \
|
||||||
lapacke_dsysv.o \
|
lapacke_dsysv.o \
|
||||||
lapacke_dsysv_rook.o \
|
|
||||||
lapacke_dsysv_rook_work.o \
|
|
||||||
lapacke_dsysv_work.o \
|
lapacke_dsysv_work.o \
|
||||||
lapacke_dsysv_aa.o \
|
lapacke_dsysv_aa.o \
|
||||||
lapacke_dsysv_aa_work.o \
|
lapacke_dsysv_aa_work.o \
|
||||||
|
@ -1096,6 +1111,8 @@ lapacke_dsysv_aa_2stage.o \
|
||||||
lapacke_dsysv_aa_2stage_work.o \
|
lapacke_dsysv_aa_2stage_work.o \
|
||||||
lapacke_dsysv_rk.o \
|
lapacke_dsysv_rk.o \
|
||||||
lapacke_dsysv_rk_work.o \
|
lapacke_dsysv_rk_work.o \
|
||||||
|
lapacke_dsysv_rook.o \
|
||||||
|
lapacke_dsysv_rook_work.o \
|
||||||
lapacke_dsysvx.o \
|
lapacke_dsysvx.o \
|
||||||
lapacke_dsysvx_work.o \
|
lapacke_dsysvx_work.o \
|
||||||
lapacke_dsyswapr.o \
|
lapacke_dsyswapr.o \
|
||||||
|
@ -1104,36 +1121,34 @@ lapacke_dsytrd.o \
|
||||||
lapacke_dsytrd_work.o \
|
lapacke_dsytrd_work.o \
|
||||||
lapacke_dsytrf.o \
|
lapacke_dsytrf.o \
|
||||||
lapacke_dsytrf_work.o \
|
lapacke_dsytrf_work.o \
|
||||||
lapacke_dsytrf_rook.o \
|
|
||||||
lapacke_dsytrf_rook_work.o \
|
|
||||||
lapacke_dsytrf_aa.o \
|
lapacke_dsytrf_aa.o \
|
||||||
lapacke_dsytrf_aa_work.o \
|
lapacke_dsytrf_aa_work.o \
|
||||||
lapacke_dsytrf_aa_2stage.o \
|
lapacke_dsytrf_aa_2stage.o \
|
||||||
lapacke_dsytrf_aa_2stage_work.o \
|
lapacke_dsytrf_aa_2stage_work.o \
|
||||||
lapacke_dsytrf_rk.o \
|
lapacke_dsytrf_rk.o \
|
||||||
lapacke_dsytrf_rk_work.o \
|
lapacke_dsytrf_rk_work.o \
|
||||||
|
lapacke_dsytrf_rook.o \
|
||||||
|
lapacke_dsytrf_rook_work.o \
|
||||||
lapacke_dsytri.o \
|
lapacke_dsytri.o \
|
||||||
|
lapacke_dsytri_work.o \
|
||||||
lapacke_dsytri2.o \
|
lapacke_dsytri2.o \
|
||||||
lapacke_dsytri2_work.o \
|
lapacke_dsytri2_work.o \
|
||||||
lapacke_dsytri_3.o \
|
|
||||||
lapacke_dsytri_3_work.o \
|
|
||||||
lapacke_dsytri2x.o \
|
lapacke_dsytri2x.o \
|
||||||
lapacke_dsytri2x_work.o \
|
lapacke_dsytri2x_work.o \
|
||||||
lapacke_dsytri_work.o
|
lapacke_dsytri_3.o \
|
||||||
|
lapacke_dsytri_3_work.o \
|
||||||
OBJ_B = \
|
|
||||||
lapacke_dsytrs.o \
|
lapacke_dsytrs.o \
|
||||||
lapacke_dsytrs_rook.o \
|
lapacke_dsytrs_work.o \
|
||||||
lapacke_dsytrs2.o \
|
lapacke_dsytrs2.o \
|
||||||
lapacke_dsytrs2_work.o \
|
lapacke_dsytrs2_work.o \
|
||||||
lapacke_dsytrs_work.o \
|
|
||||||
lapacke_dsytrs_rook_work.o \
|
|
||||||
lapacke_dsytrs_aa.o \
|
|
||||||
lapacke_dsytrs_aa_2stage.o \
|
|
||||||
lapacke_dsytrs_aa_work.o \
|
|
||||||
lapacke_dsytrs_aa_2stage_work.o \
|
|
||||||
lapacke_dsytrs_3.o \
|
lapacke_dsytrs_3.o \
|
||||||
lapacke_dsytrs_3_work.o \
|
lapacke_dsytrs_3_work.o \
|
||||||
|
lapacke_dsytrs_aa.o \
|
||||||
|
lapacke_dsytrs_aa_work.o \
|
||||||
|
lapacke_dsytrs_aa_2stage.o \
|
||||||
|
lapacke_dsytrs_aa_2stage_work.o \
|
||||||
|
lapacke_dsytrs_rook.o \
|
||||||
|
lapacke_dsytrs_rook_work.o \
|
||||||
lapacke_dtbcon.o \
|
lapacke_dtbcon.o \
|
||||||
lapacke_dtbcon_work.o \
|
lapacke_dtbcon_work.o \
|
||||||
lapacke_dtbrfs.o \
|
lapacke_dtbrfs.o \
|
||||||
|
@ -1165,9 +1180,9 @@ lapacke_dtpcon_work.o \
|
||||||
lapacke_dtpmqrt.o \
|
lapacke_dtpmqrt.o \
|
||||||
lapacke_dtpmqrt_work.o \
|
lapacke_dtpmqrt_work.o \
|
||||||
lapacke_dtpqrt.o \
|
lapacke_dtpqrt.o \
|
||||||
|
lapacke_dtpqrt_work.o \
|
||||||
lapacke_dtpqrt2.o \
|
lapacke_dtpqrt2.o \
|
||||||
lapacke_dtpqrt2_work.o \
|
lapacke_dtpqrt2_work.o \
|
||||||
lapacke_dtpqrt_work.o \
|
|
||||||
lapacke_dtprfb.o \
|
lapacke_dtprfb.o \
|
||||||
lapacke_dtprfb_work.o \
|
lapacke_dtprfb_work.o \
|
||||||
lapacke_dtprfs.o \
|
lapacke_dtprfs.o \
|
||||||
|
@ -1203,16 +1218,17 @@ lapacke_dtrttf_work.o \
|
||||||
lapacke_dtrttp.o \
|
lapacke_dtrttp.o \
|
||||||
lapacke_dtrttp_work.o \
|
lapacke_dtrttp_work.o \
|
||||||
lapacke_dtzrzf.o \
|
lapacke_dtzrzf.o \
|
||||||
lapacke_dtzrzf_work.o \
|
lapacke_dtzrzf_work.o
|
||||||
lapacke_nancheck.o \
|
|
||||||
|
OBJ_S = \
|
||||||
lapacke_sbbcsd.o \
|
lapacke_sbbcsd.o \
|
||||||
lapacke_sbbcsd_work.o \
|
lapacke_sbbcsd_work.o \
|
||||||
lapacke_sbdsdc.o \
|
lapacke_sbdsdc.o \
|
||||||
lapacke_sbdsdc_work.o \
|
lapacke_sbdsdc_work.o \
|
||||||
lapacke_sbdsvdx.o \
|
|
||||||
lapacke_sbdsvdx_work.o \
|
|
||||||
lapacke_sbdsqr.o \
|
lapacke_sbdsqr.o \
|
||||||
lapacke_sbdsqr_work.o \
|
lapacke_sbdsqr_work.o \
|
||||||
|
lapacke_sbdsvdx.o \
|
||||||
|
lapacke_sbdsvdx_work.o \
|
||||||
lapacke_sdisna.o \
|
lapacke_sdisna.o \
|
||||||
lapacke_sdisna_work.o \
|
lapacke_sdisna_work.o \
|
||||||
lapacke_sgbbrd.o \
|
lapacke_sgbbrd.o \
|
||||||
|
@ -1290,11 +1306,11 @@ lapacke_sgeqrf_work.o \
|
||||||
lapacke_sgeqrfp.o \
|
lapacke_sgeqrfp.o \
|
||||||
lapacke_sgeqrfp_work.o \
|
lapacke_sgeqrfp_work.o \
|
||||||
lapacke_sgeqrt.o \
|
lapacke_sgeqrt.o \
|
||||||
|
lapacke_sgeqrt_work.o \
|
||||||
lapacke_sgeqrt2.o \
|
lapacke_sgeqrt2.o \
|
||||||
lapacke_sgeqrt2_work.o \
|
lapacke_sgeqrt2_work.o \
|
||||||
lapacke_sgeqrt3.o \
|
lapacke_sgeqrt3.o \
|
||||||
lapacke_sgeqrt3_work.o \
|
lapacke_sgeqrt3_work.o \
|
||||||
lapacke_sgeqrt_work.o \
|
|
||||||
lapacke_sgerfs.o \
|
lapacke_sgerfs.o \
|
||||||
lapacke_sgerfs_work.o \
|
lapacke_sgerfs_work.o \
|
||||||
lapacke_sgerqf.o \
|
lapacke_sgerqf.o \
|
||||||
|
@ -1305,6 +1321,8 @@ lapacke_sgesv.o \
|
||||||
lapacke_sgesv_work.o \
|
lapacke_sgesv_work.o \
|
||||||
lapacke_sgesvd.o \
|
lapacke_sgesvd.o \
|
||||||
lapacke_sgesvd_work.o \
|
lapacke_sgesvd_work.o \
|
||||||
|
lapacke_sgesvdq.o \
|
||||||
|
lapacke_sgesvdq_work.o \
|
||||||
lapacke_sgesvdx.o \
|
lapacke_sgesvdx.o \
|
||||||
lapacke_sgesvdx_work.o \
|
lapacke_sgesvdx_work.o \
|
||||||
lapacke_sgesvj.o \
|
lapacke_sgesvj.o \
|
||||||
|
@ -1341,10 +1359,10 @@ lapacke_sggevx.o \
|
||||||
lapacke_sggevx_work.o \
|
lapacke_sggevx_work.o \
|
||||||
lapacke_sggglm.o \
|
lapacke_sggglm.o \
|
||||||
lapacke_sggglm_work.o \
|
lapacke_sggglm_work.o \
|
||||||
lapacke_sgghrd.o \
|
|
||||||
lapacke_sgghrd_work.o \
|
|
||||||
lapacke_sgghd3.o \
|
lapacke_sgghd3.o \
|
||||||
lapacke_sgghd3_work.o \
|
lapacke_sgghd3_work.o \
|
||||||
|
lapacke_sgghrd.o \
|
||||||
|
lapacke_sgghrd_work.o \
|
||||||
lapacke_sgglse.o \
|
lapacke_sgglse.o \
|
||||||
lapacke_sgglse_work.o \
|
lapacke_sgglse_work.o \
|
||||||
lapacke_sggqrf.o \
|
lapacke_sggqrf.o \
|
||||||
|
@ -1537,14 +1555,14 @@ lapacke_spttrs.o \
|
||||||
lapacke_spttrs_work.o \
|
lapacke_spttrs_work.o \
|
||||||
lapacke_ssbev.o \
|
lapacke_ssbev.o \
|
||||||
lapacke_ssbev_work.o \
|
lapacke_ssbev_work.o \
|
||||||
lapacke_ssbevd.o \
|
|
||||||
lapacke_ssbevd_work.o \
|
|
||||||
lapacke_ssbevx.o \
|
|
||||||
lapacke_ssbevx_work.o \
|
|
||||||
lapacke_ssbev_2stage.o \
|
lapacke_ssbev_2stage.o \
|
||||||
lapacke_ssbev_2stage_work.o \
|
lapacke_ssbev_2stage_work.o \
|
||||||
|
lapacke_ssbevd.o \
|
||||||
|
lapacke_ssbevd_work.o \
|
||||||
lapacke_ssbevd_2stage.o \
|
lapacke_ssbevd_2stage.o \
|
||||||
lapacke_ssbevd_2stage_work.o \
|
lapacke_ssbevd_2stage_work.o \
|
||||||
|
lapacke_ssbevx.o \
|
||||||
|
lapacke_ssbevx_work.o \
|
||||||
lapacke_ssbevx_2stage.o \
|
lapacke_ssbevx_2stage.o \
|
||||||
lapacke_ssbevx_2stage_work.o \
|
lapacke_ssbevx_2stage_work.o \
|
||||||
lapacke_ssbgst.o \
|
lapacke_ssbgst.o \
|
||||||
|
@ -1621,18 +1639,18 @@ lapacke_ssyequb.o \
|
||||||
lapacke_ssyequb_work.o \
|
lapacke_ssyequb_work.o \
|
||||||
lapacke_ssyev.o \
|
lapacke_ssyev.o \
|
||||||
lapacke_ssyev_work.o \
|
lapacke_ssyev_work.o \
|
||||||
lapacke_ssyevd.o \
|
|
||||||
lapacke_ssyevd_work.o \
|
|
||||||
lapacke_ssyevr.o \
|
|
||||||
lapacke_ssyevr_work.o \
|
|
||||||
lapacke_ssyevx.o \
|
|
||||||
lapacke_ssyevx_work.o \
|
|
||||||
lapacke_ssyev_2stage.o \
|
lapacke_ssyev_2stage.o \
|
||||||
lapacke_ssyev_2stage_work.o \
|
lapacke_ssyev_2stage_work.o \
|
||||||
|
lapacke_ssyevd.o \
|
||||||
|
lapacke_ssyevd_work.o \
|
||||||
lapacke_ssyevd_2stage.o \
|
lapacke_ssyevd_2stage.o \
|
||||||
lapacke_ssyevd_2stage_work.o \
|
lapacke_ssyevd_2stage_work.o \
|
||||||
|
lapacke_ssyevr.o \
|
||||||
|
lapacke_ssyevr_work.o \
|
||||||
lapacke_ssyevr_2stage.o \
|
lapacke_ssyevr_2stage.o \
|
||||||
lapacke_ssyevr_2stage_work.o \
|
lapacke_ssyevr_2stage_work.o \
|
||||||
|
lapacke_ssyevx.o \
|
||||||
|
lapacke_ssyevx_work.o \
|
||||||
lapacke_ssyevx_2stage.o \
|
lapacke_ssyevx_2stage.o \
|
||||||
lapacke_ssyevx_2stage_work.o \
|
lapacke_ssyevx_2stage_work.o \
|
||||||
lapacke_ssygst.o \
|
lapacke_ssygst.o \
|
||||||
|
@ -1648,8 +1666,6 @@ lapacke_ssygvx_work.o \
|
||||||
lapacke_ssyrfs.o \
|
lapacke_ssyrfs.o \
|
||||||
lapacke_ssyrfs_work.o \
|
lapacke_ssyrfs_work.o \
|
||||||
lapacke_ssysv.o \
|
lapacke_ssysv.o \
|
||||||
lapacke_ssysv_rook.o \
|
|
||||||
lapacke_ssysv_rook_work.o \
|
|
||||||
lapacke_ssysv_work.o \
|
lapacke_ssysv_work.o \
|
||||||
lapacke_ssysv_aa.o \
|
lapacke_ssysv_aa.o \
|
||||||
lapacke_ssysv_aa_work.o \
|
lapacke_ssysv_aa_work.o \
|
||||||
|
@ -1657,6 +1673,8 @@ lapacke_ssysv_aa_2stage.o \
|
||||||
lapacke_ssysv_aa_2stage_work.o \
|
lapacke_ssysv_aa_2stage_work.o \
|
||||||
lapacke_ssysv_rk.o \
|
lapacke_ssysv_rk.o \
|
||||||
lapacke_ssysv_rk_work.o \
|
lapacke_ssysv_rk_work.o \
|
||||||
|
lapacke_ssysv_rook.o \
|
||||||
|
lapacke_ssysv_rook_work.o \
|
||||||
lapacke_ssysvx.o \
|
lapacke_ssysvx.o \
|
||||||
lapacke_ssysvx_work.o \
|
lapacke_ssysvx_work.o \
|
||||||
lapacke_ssyswapr.o \
|
lapacke_ssyswapr.o \
|
||||||
|
@ -1665,34 +1683,34 @@ lapacke_ssytrd.o \
|
||||||
lapacke_ssytrd_work.o \
|
lapacke_ssytrd_work.o \
|
||||||
lapacke_ssytrf.o \
|
lapacke_ssytrf.o \
|
||||||
lapacke_ssytrf_work.o \
|
lapacke_ssytrf_work.o \
|
||||||
lapacke_ssytrf_rook.o \
|
|
||||||
lapacke_ssytrf_rook_work.o \
|
|
||||||
lapacke_ssytrf_aa.o \
|
lapacke_ssytrf_aa.o \
|
||||||
lapacke_ssytrf_aa_work.o \
|
lapacke_ssytrf_aa_work.o \
|
||||||
lapacke_ssytrf_aa_2stage.o \
|
lapacke_ssytrf_aa_2stage.o \
|
||||||
lapacke_ssytrf_aa_2stage_work.o \
|
lapacke_ssytrf_aa_2stage_work.o \
|
||||||
lapacke_ssytrf_rk.o \
|
lapacke_ssytrf_rk.o \
|
||||||
lapacke_ssytrf_rk_work.o \
|
lapacke_ssytrf_rk_work.o \
|
||||||
|
lapacke_ssytrf_rook.o \
|
||||||
|
lapacke_ssytrf_rook_work.o \
|
||||||
lapacke_ssytri.o \
|
lapacke_ssytri.o \
|
||||||
|
lapacke_ssytri_work.o \
|
||||||
lapacke_ssytri2.o \
|
lapacke_ssytri2.o \
|
||||||
lapacke_ssytri2_work.o \
|
lapacke_ssytri2_work.o \
|
||||||
lapacke_ssytri_3.o \
|
|
||||||
lapacke_ssytri_3_work.o \
|
|
||||||
lapacke_ssytri2x.o \
|
lapacke_ssytri2x.o \
|
||||||
lapacke_ssytri2x_work.o \
|
lapacke_ssytri2x_work.o \
|
||||||
lapacke_ssytri_work.o \
|
lapacke_ssytri_3.o \
|
||||||
|
lapacke_ssytri_3_work.o \
|
||||||
lapacke_ssytrs.o \
|
lapacke_ssytrs.o \
|
||||||
lapacke_ssytrs_rook.o \
|
lapacke_ssytrs_work.o \
|
||||||
lapacke_ssytrs2.o \
|
lapacke_ssytrs2.o \
|
||||||
lapacke_ssytrs2_work.o \
|
lapacke_ssytrs2_work.o \
|
||||||
lapacke_ssytrs_work.o \
|
|
||||||
lapacke_ssytrs_rook_work.o \
|
|
||||||
lapacke_ssytrs_aa.o \
|
|
||||||
lapacke_ssytrs_aa_2stage.o \
|
|
||||||
lapacke_ssytrs_aa_work.o \
|
|
||||||
lapacke_ssytrs_aa_2stage_work.o \
|
|
||||||
lapacke_ssytrs_3.o \
|
lapacke_ssytrs_3.o \
|
||||||
lapacke_ssytrs_3_work.o \
|
lapacke_ssytrs_3_work.o \
|
||||||
|
lapacke_ssytrs_aa.o \
|
||||||
|
lapacke_ssytrs_aa_work.o \
|
||||||
|
lapacke_ssytrs_aa_2stage.o \
|
||||||
|
lapacke_ssytrs_aa_2stage_work.o \
|
||||||
|
lapacke_ssytrs_rook.o \
|
||||||
|
lapacke_ssytrs_rook_work.o \
|
||||||
lapacke_stbcon.o \
|
lapacke_stbcon.o \
|
||||||
lapacke_stbcon_work.o \
|
lapacke_stbcon_work.o \
|
||||||
lapacke_stbrfs.o \
|
lapacke_stbrfs.o \
|
||||||
|
@ -1762,7 +1780,9 @@ lapacke_strttf_work.o \
|
||||||
lapacke_strttp.o \
|
lapacke_strttp.o \
|
||||||
lapacke_strttp_work.o \
|
lapacke_strttp_work.o \
|
||||||
lapacke_stzrzf.o \
|
lapacke_stzrzf.o \
|
||||||
lapacke_stzrzf_work.o \
|
lapacke_stzrzf_work.o
|
||||||
|
|
||||||
|
OBJ_Z = \
|
||||||
lapacke_zbbcsd.o \
|
lapacke_zbbcsd.o \
|
||||||
lapacke_zbbcsd_work.o \
|
lapacke_zbbcsd_work.o \
|
||||||
lapacke_zbdsqr.o \
|
lapacke_zbdsqr.o \
|
||||||
|
@ -1846,11 +1866,11 @@ lapacke_zgeqrf_work.o \
|
||||||
lapacke_zgeqrfp.o \
|
lapacke_zgeqrfp.o \
|
||||||
lapacke_zgeqrfp_work.o \
|
lapacke_zgeqrfp_work.o \
|
||||||
lapacke_zgeqrt.o \
|
lapacke_zgeqrt.o \
|
||||||
|
lapacke_zgeqrt_work.o \
|
||||||
lapacke_zgeqrt2.o \
|
lapacke_zgeqrt2.o \
|
||||||
lapacke_zgeqrt2_work.o \
|
lapacke_zgeqrt2_work.o \
|
||||||
lapacke_zgeqrt3.o \
|
lapacke_zgeqrt3.o \
|
||||||
lapacke_zgeqrt3_work.o \
|
lapacke_zgeqrt3_work.o \
|
||||||
lapacke_zgeqrt_work.o \
|
|
||||||
lapacke_zgerfs.o \
|
lapacke_zgerfs.o \
|
||||||
lapacke_zgerfs_work.o \
|
lapacke_zgerfs_work.o \
|
||||||
lapacke_zgerqf.o \
|
lapacke_zgerqf.o \
|
||||||
|
@ -1861,6 +1881,8 @@ lapacke_zgesv.o \
|
||||||
lapacke_zgesv_work.o \
|
lapacke_zgesv_work.o \
|
||||||
lapacke_zgesvd.o \
|
lapacke_zgesvd.o \
|
||||||
lapacke_zgesvd_work.o \
|
lapacke_zgesvd_work.o \
|
||||||
|
lapacke_zgesvdq.o \
|
||||||
|
lapacke_zgesvdq_work.o \
|
||||||
lapacke_zgesvdx.o \
|
lapacke_zgesvdx.o \
|
||||||
lapacke_zgesvdx_work.o \
|
lapacke_zgesvdx_work.o \
|
||||||
lapacke_zgesvj.o \
|
lapacke_zgesvj.o \
|
||||||
|
@ -1897,10 +1919,10 @@ lapacke_zggevx.o \
|
||||||
lapacke_zggevx_work.o \
|
lapacke_zggevx_work.o \
|
||||||
lapacke_zggglm.o \
|
lapacke_zggglm.o \
|
||||||
lapacke_zggglm_work.o \
|
lapacke_zggglm_work.o \
|
||||||
lapacke_zgghrd.o \
|
|
||||||
lapacke_zgghrd_work.o \
|
|
||||||
lapacke_zgghd3.o \
|
lapacke_zgghd3.o \
|
||||||
lapacke_zgghd3_work.o \
|
lapacke_zgghd3_work.o \
|
||||||
|
lapacke_zgghrd.o \
|
||||||
|
lapacke_zgghrd_work.o \
|
||||||
lapacke_zgglse.o \
|
lapacke_zgglse.o \
|
||||||
lapacke_zgglse_work.o \
|
lapacke_zgglse_work.o \
|
||||||
lapacke_zggqrf.o \
|
lapacke_zggqrf.o \
|
||||||
|
@ -1925,14 +1947,14 @@ lapacke_zgttrs.o \
|
||||||
lapacke_zgttrs_work.o \
|
lapacke_zgttrs_work.o \
|
||||||
lapacke_zhbev.o \
|
lapacke_zhbev.o \
|
||||||
lapacke_zhbev_work.o \
|
lapacke_zhbev_work.o \
|
||||||
lapacke_zhbevd.o \
|
|
||||||
lapacke_zhbevd_work.o \
|
|
||||||
lapacke_zhbevx.o \
|
|
||||||
lapacke_zhbevx_work.o \
|
|
||||||
lapacke_zhbev_2stage.o \
|
lapacke_zhbev_2stage.o \
|
||||||
lapacke_zhbev_2stage_work.o \
|
lapacke_zhbev_2stage_work.o \
|
||||||
|
lapacke_zhbevd.o \
|
||||||
|
lapacke_zhbevd_work.o \
|
||||||
lapacke_zhbevd_2stage.o \
|
lapacke_zhbevd_2stage.o \
|
||||||
lapacke_zhbevd_2stage_work.o \
|
lapacke_zhbevd_2stage_work.o \
|
||||||
|
lapacke_zhbevx.o \
|
||||||
|
lapacke_zhbevx_work.o \
|
||||||
lapacke_zhbevx_2stage.o \
|
lapacke_zhbevx_2stage.o \
|
||||||
lapacke_zhbevx_2stage_work.o \
|
lapacke_zhbevx_2stage_work.o \
|
||||||
lapacke_zhbgst.o \
|
lapacke_zhbgst.o \
|
||||||
|
@ -1953,18 +1975,18 @@ lapacke_zheequb.o \
|
||||||
lapacke_zheequb_work.o \
|
lapacke_zheequb_work.o \
|
||||||
lapacke_zheev.o \
|
lapacke_zheev.o \
|
||||||
lapacke_zheev_work.o \
|
lapacke_zheev_work.o \
|
||||||
lapacke_zheevd.o \
|
|
||||||
lapacke_zheevd_work.o \
|
|
||||||
lapacke_zheevr.o \
|
|
||||||
lapacke_zheevr_work.o \
|
|
||||||
lapacke_zheevx.o \
|
|
||||||
lapacke_zheevx_work.o \
|
|
||||||
lapacke_zheev_2stage.o \
|
lapacke_zheev_2stage.o \
|
||||||
lapacke_zheev_2stage_work.o \
|
lapacke_zheev_2stage_work.o \
|
||||||
|
lapacke_zheevd.o \
|
||||||
|
lapacke_zheevd_work.o \
|
||||||
lapacke_zheevd_2stage.o \
|
lapacke_zheevd_2stage.o \
|
||||||
lapacke_zheevd_2stage_work.o \
|
lapacke_zheevd_2stage_work.o \
|
||||||
|
lapacke_zheevr.o \
|
||||||
|
lapacke_zheevr_work.o \
|
||||||
lapacke_zheevr_2stage.o \
|
lapacke_zheevr_2stage.o \
|
||||||
lapacke_zheevr_2stage_work.o \
|
lapacke_zheevr_2stage_work.o \
|
||||||
|
lapacke_zheevx.o \
|
||||||
|
lapacke_zheevx_work.o \
|
||||||
lapacke_zheevx_2stage.o \
|
lapacke_zheevx_2stage.o \
|
||||||
lapacke_zheevx_2stage_work.o \
|
lapacke_zheevx_2stage_work.o \
|
||||||
lapacke_zhegst.o \
|
lapacke_zhegst.o \
|
||||||
|
@ -1994,35 +2016,35 @@ lapacke_zheswapr_work.o \
|
||||||
lapacke_zhetrd.o \
|
lapacke_zhetrd.o \
|
||||||
lapacke_zhetrd_work.o \
|
lapacke_zhetrd_work.o \
|
||||||
lapacke_zhetrf.o \
|
lapacke_zhetrf.o \
|
||||||
lapacke_zhetrf_rook.o \
|
|
||||||
lapacke_zhetrf_work.o \
|
lapacke_zhetrf_work.o \
|
||||||
lapacke_zhetrf_rook_work.o \
|
|
||||||
lapacke_zhetrf_aa.o \
|
lapacke_zhetrf_aa.o \
|
||||||
lapacke_zhetrf_aa_2stage.o \
|
|
||||||
lapacke_zhetrf_aa_work.o \
|
lapacke_zhetrf_aa_work.o \
|
||||||
|
lapacke_zhetrf_aa_2stage.o \
|
||||||
lapacke_zhetrf_aa_2stage_work.o \
|
lapacke_zhetrf_aa_2stage_work.o \
|
||||||
lapacke_zhetrf_rk.o \
|
lapacke_zhetrf_rk.o \
|
||||||
lapacke_zhetrf_rk_work.o \
|
lapacke_zhetrf_rk_work.o \
|
||||||
|
lapacke_zhetrf_rook.o \
|
||||||
|
lapacke_zhetrf_rook_work.o \
|
||||||
lapacke_zhetri.o \
|
lapacke_zhetri.o \
|
||||||
|
lapacke_zhetri_work.o \
|
||||||
lapacke_zhetri2.o \
|
lapacke_zhetri2.o \
|
||||||
lapacke_zhetri2_work.o \
|
lapacke_zhetri2_work.o \
|
||||||
lapacke_zhetri_3.o \
|
|
||||||
lapacke_zhetri_3_work.o \
|
|
||||||
lapacke_zhetri2x.o \
|
lapacke_zhetri2x.o \
|
||||||
lapacke_zhetri2x_work.o \
|
lapacke_zhetri2x_work.o \
|
||||||
lapacke_zhetri_work.o \
|
lapacke_zhetri_3.o \
|
||||||
|
lapacke_zhetri_3_work.o \
|
||||||
lapacke_zhetrs.o \
|
lapacke_zhetrs.o \
|
||||||
lapacke_zhetrs_rook.o \
|
lapacke_zhetrs_work.o \
|
||||||
lapacke_zhetrs2.o \
|
lapacke_zhetrs2.o \
|
||||||
lapacke_zhetrs2_work.o \
|
lapacke_zhetrs2_work.o \
|
||||||
lapacke_zhetrs_work.o \
|
|
||||||
lapacke_zhetrs_rook_work.o \
|
|
||||||
lapacke_zhetrs_aa.o \
|
|
||||||
lapacke_zhetrs_aa_2stage.o \
|
|
||||||
lapacke_zhetrs_aa_work.o \
|
|
||||||
lapacke_zhetrs_aa_2stage_work.o \
|
|
||||||
lapacke_zhetrs_3.o \
|
lapacke_zhetrs_3.o \
|
||||||
lapacke_zhetrs_3_work.o \
|
lapacke_zhetrs_3_work.o \
|
||||||
|
lapacke_zhetrs_aa.o \
|
||||||
|
lapacke_zhetrs_aa_work.o \
|
||||||
|
lapacke_zhetrs_aa_2stage.o \
|
||||||
|
lapacke_zhetrs_aa_2stage_work.o \
|
||||||
|
lapacke_zhetrs_rook.o \
|
||||||
|
lapacke_zhetrs_rook_work.o \
|
||||||
lapacke_zhfrk.o \
|
lapacke_zhfrk.o \
|
||||||
lapacke_zhfrk_work.o \
|
lapacke_zhfrk_work.o \
|
||||||
lapacke_zhgeqz.o \
|
lapacke_zhgeqz.o \
|
||||||
|
@ -2213,11 +2235,11 @@ lapacke_zsyconv.o \
|
||||||
lapacke_zsyconv_work.o \
|
lapacke_zsyconv_work.o \
|
||||||
lapacke_zsyequb.o \
|
lapacke_zsyequb.o \
|
||||||
lapacke_zsyequb_work.o \
|
lapacke_zsyequb_work.o \
|
||||||
|
lapacke_zsyr.o \
|
||||||
|
lapacke_zsyr_work.o \
|
||||||
lapacke_zsyrfs.o \
|
lapacke_zsyrfs.o \
|
||||||
lapacke_zsyrfs_work.o \
|
lapacke_zsyrfs_work.o \
|
||||||
lapacke_zsysv.o \
|
lapacke_zsysv.o \
|
||||||
lapacke_zsysv_rook.o \
|
|
||||||
lapacke_zsysv_rook_work.o \
|
|
||||||
lapacke_zsysv_work.o \
|
lapacke_zsysv_work.o \
|
||||||
lapacke_zsysv_aa.o \
|
lapacke_zsysv_aa.o \
|
||||||
lapacke_zsysv_aa_work.o \
|
lapacke_zsysv_aa_work.o \
|
||||||
|
@ -2225,40 +2247,42 @@ lapacke_zsysv_aa_2stage.o \
|
||||||
lapacke_zsysv_aa_2stage_work.o \
|
lapacke_zsysv_aa_2stage_work.o \
|
||||||
lapacke_zsysv_rk.o \
|
lapacke_zsysv_rk.o \
|
||||||
lapacke_zsysv_rk_work.o \
|
lapacke_zsysv_rk_work.o \
|
||||||
|
lapacke_zsysv_rook.o \
|
||||||
|
lapacke_zsysv_rook_work.o \
|
||||||
lapacke_zsysvx.o \
|
lapacke_zsysvx.o \
|
||||||
lapacke_zsysvx_work.o \
|
lapacke_zsysvx_work.o \
|
||||||
lapacke_zsyswapr.o \
|
lapacke_zsyswapr.o \
|
||||||
lapacke_zsyswapr_work.o \
|
lapacke_zsyswapr_work.o \
|
||||||
lapacke_zsytrf.o \
|
lapacke_zsytrf.o \
|
||||||
lapacke_zsytrf_work.o \
|
lapacke_zsytrf_work.o \
|
||||||
lapacke_zsytrf_rook.o \
|
|
||||||
lapacke_zsytrf_rook_work.o \
|
|
||||||
lapacke_zsytrf_aa.o \
|
lapacke_zsytrf_aa.o \
|
||||||
lapacke_zsytrf_aa_2stage.o \
|
|
||||||
lapacke_zsytrf_aa_work.o \
|
lapacke_zsytrf_aa_work.o \
|
||||||
|
lapacke_zsytrf_aa_2stage.o \
|
||||||
lapacke_zsytrf_aa_2stage_work.o \
|
lapacke_zsytrf_aa_2stage_work.o \
|
||||||
lapacke_zsytrf_rk.o \
|
lapacke_zsytrf_rk.o \
|
||||||
lapacke_zsytrf_rk_work.o \
|
lapacke_zsytrf_rk_work.o \
|
||||||
|
lapacke_zsytrf_rook.o \
|
||||||
|
lapacke_zsytrf_rook_work.o \
|
||||||
lapacke_zsytri.o \
|
lapacke_zsytri.o \
|
||||||
|
lapacke_zsytri_work.o \
|
||||||
lapacke_zsytri2.o \
|
lapacke_zsytri2.o \
|
||||||
lapacke_zsytri2_work.o \
|
lapacke_zsytri2_work.o \
|
||||||
lapacke_zsytri_3.o \
|
|
||||||
lapacke_zsytri_3_work.o \
|
|
||||||
lapacke_zsytri2x.o \
|
lapacke_zsytri2x.o \
|
||||||
lapacke_zsytri2x_work.o \
|
lapacke_zsytri2x_work.o \
|
||||||
lapacke_zsytri_work.o \
|
lapacke_zsytri_3.o \
|
||||||
|
lapacke_zsytri_3_work.o \
|
||||||
lapacke_zsytrs.o \
|
lapacke_zsytrs.o \
|
||||||
lapacke_zsytrs_rook.o \
|
lapacke_zsytrs_work.o \
|
||||||
lapacke_zsytrs2.o \
|
lapacke_zsytrs2.o \
|
||||||
lapacke_zsytrs2_work.o \
|
lapacke_zsytrs2_work.o \
|
||||||
lapacke_zsytrs_work.o \
|
|
||||||
lapacke_zsytrs_rook_work.o \
|
|
||||||
lapacke_zsytrs_aa.o \
|
|
||||||
lapacke_zsytrs_aa_2stage.o \
|
|
||||||
lapacke_zsytrs_aa_work.o \
|
|
||||||
lapacke_zsytrs_aa_2stage_work.o \
|
|
||||||
lapacke_zsytrs_3.o \
|
lapacke_zsytrs_3.o \
|
||||||
lapacke_zsytrs_3_work.o \
|
lapacke_zsytrs_3_work.o \
|
||||||
|
lapacke_zsytrs_aa.o \
|
||||||
|
lapacke_zsytrs_aa_work.o \
|
||||||
|
lapacke_zsytrs_aa_2stage.o \
|
||||||
|
lapacke_zsytrs_aa_2stage_work.o \
|
||||||
|
lapacke_zsytrs_rook.o \
|
||||||
|
lapacke_zsytrs_rook_work.o \
|
||||||
lapacke_ztbcon.o \
|
lapacke_ztbcon.o \
|
||||||
lapacke_ztbcon_work.o \
|
lapacke_ztbcon_work.o \
|
||||||
lapacke_ztbrfs.o \
|
lapacke_ztbrfs.o \
|
||||||
|
@ -2290,9 +2314,9 @@ lapacke_ztpcon_work.o \
|
||||||
lapacke_ztpmqrt.o \
|
lapacke_ztpmqrt.o \
|
||||||
lapacke_ztpmqrt_work.o \
|
lapacke_ztpmqrt_work.o \
|
||||||
lapacke_ztpqrt.o \
|
lapacke_ztpqrt.o \
|
||||||
|
lapacke_ztpqrt_work.o \
|
||||||
lapacke_ztpqrt2.o \
|
lapacke_ztpqrt2.o \
|
||||||
lapacke_ztpqrt2_work.o \
|
lapacke_ztpqrt2_work.o \
|
||||||
lapacke_ztpqrt_work.o \
|
|
||||||
lapacke_ztprfb.o \
|
lapacke_ztprfb.o \
|
||||||
lapacke_ztprfb_work.o \
|
lapacke_ztprfb_work.o \
|
||||||
lapacke_ztprfs.o \
|
lapacke_ztprfs.o \
|
||||||
|
@ -2368,12 +2392,7 @@ lapacke_zunmtr_work.o \
|
||||||
lapacke_zupgtr.o \
|
lapacke_zupgtr.o \
|
||||||
lapacke_zupgtr_work.o \
|
lapacke_zupgtr_work.o \
|
||||||
lapacke_zupmtr.o \
|
lapacke_zupmtr.o \
|
||||||
lapacke_zupmtr_work.o \
|
lapacke_zupmtr_work.o
|
||||||
lapacke_zsyr.o \
|
|
||||||
lapacke_csyr.o \
|
|
||||||
lapacke_zsyr_work.o \
|
|
||||||
lapacke_csyr_work.o \
|
|
||||||
lapacke_ilaver.o
|
|
||||||
|
|
||||||
ifdef BUILD_DEPRECATED
|
ifdef BUILD_DEPRECATED
|
||||||
DEPRECATED = \
|
DEPRECATED = \
|
||||||
|
@ -2452,27 +2471,29 @@ lapacke_zlagsy.o \
|
||||||
lapacke_zlagsy_work.o
|
lapacke_zlagsy_work.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
all: ../../$(LAPACKELIB)
|
.PHONY: all
|
||||||
|
all: $(LAPACKELIB)
|
||||||
|
|
||||||
.PHONY: ../../$(LAPACKELIB)
|
$(LAPACKELIB): $(OBJ) $(OBJ_S) $(OBJ_C) $(OBJ_D) $(OBJ_Z) $(DEPRECATED) $(EXTENDED) $(MATGEN)
|
||||||
|
$(AR) $(ARFLAGS) $@ $(OBJ)
|
||||||
../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN)
|
$(AR) $(ARFLAGS) $@ $(OBJ_S)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(OBJ_A)
|
$(AR) $(ARFLAGS) $@ $(OBJ_C)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(OBJ_B)
|
$(AR) $(ARFLAGS) $@ $(OBJ_D)
|
||||||
|
$(AR) $(ARFLAGS) $@ $(OBJ_Z)
|
||||||
ifdef BUILD_DEPRECATED
|
ifdef BUILD_DEPRECATED
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED)
|
$(AR) $(ARFLAGS) $@ $(DEPRECATED)
|
||||||
endif
|
endif
|
||||||
ifdef (USEXBLAS)
|
ifdef (USEXBLAS)
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(EXTENDED)
|
$(AR) $(ARFLAGS) $@ $(EXTENDED)
|
||||||
endif
|
endif
|
||||||
ifdef LAPACKE_WITH_TMG
|
ifdef LAPACKE_WITH_TMG
|
||||||
$(ARCH) $(ARCHFLAGS) $@ $(MATGEN)
|
$(AR) $(ARFLAGS) $@ $(MATGEN)
|
||||||
endif
|
endif
|
||||||
$(RANLIB) $@
|
$(RANLIB) $@
|
||||||
|
|
||||||
clean: cleanobj
|
.PHONY: clean cleanobj cleanlib
|
||||||
|
clean: cleanobj cleanlib
|
||||||
cleanobj:
|
cleanobj:
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
|
cleanlib:
|
||||||
.c.o:
|
rm -f $(LAPACKELIB)
|
||||||
$(CC) $(CFLAGS) -I../include -c -o $@ $<
|
|
||||||
|
|
|
@ -124,7 +124,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv,
|
||||||
float* rwork = NULL;
|
float* rwork = NULL;
|
||||||
lapack_complex_float* cwork = NULL;
|
lapack_complex_float* cwork = NULL;
|
||||||
lapack_int i;
|
lapack_int i;
|
||||||
lapack_int nu, nv;
|
|
||||||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||||
LAPACKE_xerbla( "LAPACKE_cgejsv", -1 );
|
LAPACKE_xerbla( "LAPACKE_cgejsv", -1 );
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -132,8 +131,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv,
|
||||||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||||
if( LAPACKE_get_nancheck() ) {
|
if( LAPACKE_get_nancheck() ) {
|
||||||
/* Optionally check input matrices for NaNs */
|
/* Optionally check input matrices for NaNs */
|
||||||
nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m;
|
|
||||||
nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n;
|
|
||||||
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
|
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
|
||||||
return -10;
|
return -10;
|
||||||
}
|
}
|
||||||
|
|
|
@ -75,7 +75,7 @@ lapack_int LAPACKE_cgelsd( int matrix_layout, lapack_int m, lapack_int n,
|
||||||
if( info != 0 ) {
|
if( info != 0 ) {
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
liwork = (lapack_int)iwork_query;
|
liwork = iwork_query;
|
||||||
lrwork = (lapack_int)rwork_query;
|
lrwork = (lapack_int)rwork_query;
|
||||||
lwork = LAPACK_C2INT( work_query );
|
lwork = LAPACK_C2INT( work_query );
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
|
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2014, Intel Corp.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
may be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************
|
||||||
|
* Contents: Native high-level C interface to LAPACK function cgesvdq
|
||||||
|
* Author: Intel Corporation
|
||||||
|
* Generated November 2018
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include "lapacke_utils.h"
|
||||||
|
|
||||||
|
lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
|
||||||
|
char jobr, char jobu, char jobv,
|
||||||
|
lapack_int m, lapack_int n, lapack_complex_float* a,
|
||||||
|
lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu,
|
||||||
|
lapack_complex_float* v, lapack_int ldv, lapack_int* numrank)
|
||||||
|
{
|
||||||
|
lapack_int info = 0;
|
||||||
|
lapack_int liwork = -1;
|
||||||
|
lapack_int* iwork = NULL;
|
||||||
|
lapack_int iwork_query;
|
||||||
|
lapack_int lcwork = -1;
|
||||||
|
lapack_complex_float* cwork = NULL;
|
||||||
|
lapack_complex_float cwork_query;
|
||||||
|
lapack_int lrwork = -1;
|
||||||
|
double* rwork = NULL;
|
||||||
|
double rwork_query;
|
||||||
|
lapack_int i;
|
||||||
|
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||||
|
LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||||
|
if( LAPACKE_get_nancheck() ) {
|
||||||
|
/* Optionally check input matrices for NaNs */
|
||||||
|
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
|
||||||
|
return -6;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
/* Query optimal working array(s) size */
|
||||||
|
info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv,
|
||||||
|
m, n, a, lda, s, u, ldu, v, ldv, numrank,
|
||||||
|
&iwork_query, liwork, &cwork_query, lcwork,
|
||||||
|
&rwork_query, lrwork );
|
||||||
|
if( info != 0 ) {
|
||||||
|
goto exit_level_0;
|
||||||
|
}
|
||||||
|
liwork = iwork_query;
|
||||||
|
lcwork = LAPACK_C2INT(cwork_query);
|
||||||
|
lrwork = (lapack_int)rwork_query;
|
||||||
|
/* Allocate memory for work arrays */
|
||||||
|
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
|
||||||
|
if( iwork == NULL ) {
|
||||||
|
info = LAPACK_WORK_MEMORY_ERROR;
|
||||||
|
goto exit_level_0;
|
||||||
|
}
|
||||||
|
cwork = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lcwork );
|
||||||
|
if( cwork == NULL ) {
|
||||||
|
info = LAPACK_WORK_MEMORY_ERROR;
|
||||||
|
goto exit_level_0;
|
||||||
|
}
|
||||||
|
rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork );
|
||||||
|
if( rwork == NULL ) {
|
||||||
|
info = LAPACK_WORK_MEMORY_ERROR;
|
||||||
|
goto exit_level_0;
|
||||||
|
}
|
||||||
|
/* Call middle-level interface */
|
||||||
|
info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv,
|
||||||
|
m, n, a, lda, s, u, ldu, v, ldv, numrank,
|
||||||
|
iwork, liwork, cwork, lcwork, rwork, lrwork );
|
||||||
|
|
||||||
|
/* Release memory and exit */
|
||||||
|
LAPACKE_free( iwork );
|
||||||
|
LAPACKE_free( cwork );
|
||||||
|
LAPACKE_free( rwork );
|
||||||
|
exit_level_0:
|
||||||
|
if( info == LAPACK_WORK_MEMORY_ERROR ) {
|
||||||
|
LAPACKE_xerbla( "LAPACKE_cgesvdq", info );
|
||||||
|
}
|
||||||
|
return info;
|
||||||
|
}
|
|
@ -0,0 +1,149 @@
|
||||||
|
/*****************************************************************************
|
||||||
|
Copyright (c) 2014, Intel Corp.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions are met:
|
||||||
|
|
||||||
|
* Redistributions of source code must retain the above copyright notice,
|
||||||
|
this list of conditions and the following disclaimer.
|
||||||
|
* Redistributions in binary form must reproduce the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer in the
|
||||||
|
documentation and/or other materials provided with the distribution.
|
||||||
|
* Neither the name of Intel Corporation nor the names of its contributors
|
||||||
|
may be used to endorse or promote products derived from this software
|
||||||
|
without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||||
|
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||||
|
THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*****************************************************************************
|
||||||
|
* Contents: Native middle-level C interface to LAPACK function cgesvdq
|
||||||
|
* Author: Intel Corporation
|
||||||
|
* Generated November 2015
|
||||||
|
*****************************************************************************/
|
||||||
|
|
||||||
|
#include "lapacke_utils.h"
|
||||||
|
|
||||||
|
lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp,
|
||||||
|
char jobr, char jobu, char jobv,
|
||||||
|
lapack_int m, lapack_int n, lapack_complex_float* a,
|
||||||
|
lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu,
|
||||||
|
lapack_complex_float* v, lapack_int ldv, lapack_int* numrank,
|
||||||
|
lapack_int* iwork, lapack_int liwork,
|
||||||
|
lapack_complex_float* cwork, lapack_int lcwork,
|
||||||
|
float* rwork, lapack_int lrwork )
|
||||||
|
{
|
||||||
|
lapack_int info = 0;
|
||||||
|
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||||
|
/* Call LAPACK function and adjust info */
|
||||||
|
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv,
|
||||||
|
numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info );
|
||||||
|
if( info < 0 ) {
|
||||||
|
info = info - 1;
|
||||||
|
}
|
||||||
|
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||||
|
lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) ||
|
||||||
|
LAPACKE_lsame( jobu, 's' ) ) ? m : 1;
|
||||||
|
lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m :
|
||||||
|
(LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
|
||||||
|
lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n :
|
||||||
|
( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1);
|
||||||
|
lapack_int lda_t = MAX(1,m);
|
||||||
|
lapack_int ldu_t = MAX(1,nrows_u);
|
||||||
|
lapack_int ldv_t = MAX(1,nrows_v);
|
||||||
|
lapack_complex_float* a_t = NULL;
|
||||||
|
lapack_complex_float* u_t = NULL;
|
||||||
|
lapack_complex_float* v_t = NULL;
|
||||||
|
/* Check leading dimension(s) */
|
||||||
|
if( lda < n ) {
|
||||||
|
info = -9;
|
||||||
|
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
if( ldu < ncols_u ) {
|
||||||
|
info = -12;
|
||||||
|
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
if( ldv < n ) {
|
||||||
|
info = -14;
|
||||||
|
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
|
||||||
|
return info;
|
||||||
|
}
|
||||||
|
/* Query optimal working array(s) size if requested */
|
||||||
|
if( lcwork == -1 ) {
|
||||||
|
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t,
|
||||||
|
s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork,
|
||||||
|
cwork, &lcwork, rwork, &lrwork, &info );
|
||||||
|
return (info < 0) ? (info - 1) : info;
|
||||||
|
}
|
||||||
|
/* Allocate memory for temporary array(s) */
|
||||||
|
a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
|
||||||
|
if( a_t == NULL ) {
|
||||||
|
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||||
|
goto exit_level_0;
|
||||||
|
}
|
||||||
|
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
|
||||||
|
u_t = (lapack_complex_float*)
|
||||||
|
LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) );
|
||||||
|
if( u_t == NULL ) {
|
||||||
|
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||||
|
goto exit_level_1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) {
|
||||||
|
v_t = (lapack_complex_float*)
|
||||||
|
LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) );
|
||||||
|
if( v_t == NULL ) {
|
||||||
|
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||||
|
goto exit_level_2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Transpose input matrices */
|
||||||
|
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
|
||||||
|
/* Call LAPACK function and adjust info */
|
||||||
|
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t,
|
||||||
|
s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork,
|
||||||
|
cwork, &lcwork, rwork, &lrwork, &info );
|
||||||
|
if( info < 0 ) {
|
||||||
|
info = info - 1;
|
||||||
|
}
|
||||||
|
/* Transpose output matrices */
|
||||||
|
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
|
||||||
|
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
|
||||||
|
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
|
||||||
|
u, ldu );
|
||||||
|
}
|
||||||
|
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) {
|
||||||
|
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v,
|
||||||
|
ldv );
|
||||||
|
}
|
||||||
|
/* Release memory and exit */
|
||||||
|
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) {
|
||||||
|
LAPACKE_free( v_t );
|
||||||
|
}
|
||||||
|
exit_level_2:
|
||||||
|
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
|
||||||
|
LAPACKE_free( u_t );
|
||||||
|
}
|
||||||
|
exit_level_1:
|
||||||
|
LAPACKE_free( a_t );
|
||||||
|
exit_level_0:
|
||||||
|
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
|
||||||
|
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
info = -1;
|
||||||
|
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
|
||||||
|
}
|
||||||
|
return info;
|
||||||
|
}
|
|
@ -91,7 +91,7 @@ lapack_int LAPACKE_cggesx( int matrix_layout, char jobvsl, char jobvsr,
|
||||||
if( info != 0 ) {
|
if( info != 0 ) {
|
||||||
goto exit_level_2;
|
goto exit_level_2;
|
||||||
}
|
}
|
||||||
liwork = (lapack_int)iwork_query;
|
liwork = iwork_query;
|
||||||
lwork = LAPACK_C2INT( work_query );
|
lwork = LAPACK_C2INT( work_query );
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
|
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
|
||||||
|
|
|
@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd( int matrix_layout, char jobz, char uplo, lapack_int n
|
||||||
if( info != 0 ) {
|
if( info != 0 ) {
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
liwork = (lapack_int)iwork_query;
|
liwork = iwork_query;
|
||||||
lrwork = (lapack_int)rwork_query;
|
lrwork = (lapack_int)rwork_query;
|
||||||
lwork = LAPACK_C2INT( work_query );
|
lwork = LAPACK_C2INT( work_query );
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
|
|
|
@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd_2stage( int matrix_layout, char jobz, char uplo, lapac
|
||||||
if( info != 0 ) {
|
if( info != 0 ) {
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
liwork = (lapack_int)iwork_query;
|
liwork = iwork_query;
|
||||||
lrwork = (lapack_int)rwork_query;
|
lrwork = (lapack_int)rwork_query;
|
||||||
lwork = LAPACK_C2INT( work_query );
|
lwork = LAPACK_C2INT( work_query );
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
|
|
|
@ -71,7 +71,7 @@ lapack_int LAPACKE_chbgvd( int matrix_layout, char jobz, char uplo, lapack_int n
|
||||||
if( info != 0 ) {
|
if( info != 0 ) {
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
liwork = (lapack_int)iwork_query;
|
liwork = iwork_query;
|
||||||
lrwork = (lapack_int)rwork_query;
|
lrwork = (lapack_int)rwork_query;
|
||||||
lwork = LAPACK_C2INT( work_query );
|
lwork = LAPACK_C2INT( work_query );
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
|
|
|
@ -70,7 +70,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo,
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
/* Transpose input matrices */
|
/* Transpose input matrices */
|
||||||
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t );
|
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t );
|
||||||
/* Call LAPACK function and adjust info */
|
/* Call LAPACK function and adjust info */
|
||||||
LAPACK_cheev( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
|
LAPACK_cheev( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
|
||||||
&info );
|
&info );
|
||||||
|
@ -78,7 +78,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo,
|
||||||
info = info - 1;
|
info = info - 1;
|
||||||
}
|
}
|
||||||
/* Transpose output matrices */
|
/* Transpose output matrices */
|
||||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||||
/* Release memory and exit */
|
/* Release memory and exit */
|
||||||
LAPACKE_free( a_t );
|
LAPACKE_free( a_t );
|
||||||
exit_level_0:
|
exit_level_0:
|
||||||
|
|
|
@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n
|
||||||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||||
if( LAPACKE_get_nancheck() ) {
|
if( LAPACKE_get_nancheck() ) {
|
||||||
/* Optionally check input matrices for NaNs */
|
/* Optionally check input matrices for NaNs */
|
||||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||||
return -5;
|
return -5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n
|
||||||
if( info != 0 ) {
|
if( info != 0 ) {
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
liwork = (lapack_int)iwork_query;
|
liwork = iwork_query;
|
||||||
lrwork = (lapack_int)rwork_query;
|
lrwork = (lapack_int)rwork_query;
|
||||||
lwork = LAPACK_C2INT( work_query );
|
lwork = LAPACK_C2INT( work_query );
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
|
|
|
@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac
|
||||||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||||
if( LAPACKE_get_nancheck() ) {
|
if( LAPACKE_get_nancheck() ) {
|
||||||
/* Optionally check input matrices for NaNs */
|
/* Optionally check input matrices for NaNs */
|
||||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||||
return -5;
|
return -5;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac
|
||||||
if( info != 0 ) {
|
if( info != 0 ) {
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
liwork = (lapack_int)iwork_query;
|
liwork = iwork_query;
|
||||||
lrwork = (lapack_int)rwork_query;
|
lrwork = (lapack_int)rwork_query;
|
||||||
lwork = LAPACK_C2INT( work_query );
|
lwork = LAPACK_C2INT( work_query );
|
||||||
/* Allocate memory for work arrays */
|
/* Allocate memory for work arrays */
|
||||||
|
|
|
@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo,
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
/* Transpose input matrices */
|
/* Transpose input matrices */
|
||||||
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t );
|
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t );
|
||||||
/* Call LAPACK function and adjust info */
|
/* Call LAPACK function and adjust info */
|
||||||
LAPACK_cheevd_2stage( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
|
LAPACK_cheevd_2stage( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
|
||||||
&lrwork, iwork, &liwork, &info );
|
&lrwork, iwork, &liwork, &info );
|
||||||
|
@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo,
|
||||||
info = info - 1;
|
info = info - 1;
|
||||||
}
|
}
|
||||||
/* Transpose output matrices */
|
/* Transpose output matrices */
|
||||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||||
/* Release memory and exit */
|
/* Release memory and exit */
|
||||||
LAPACKE_free( a_t );
|
LAPACKE_free( a_t );
|
||||||
exit_level_0:
|
exit_level_0:
|
||||||
|
|
|
@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo,
|
||||||
goto exit_level_0;
|
goto exit_level_0;
|
||||||
}
|
}
|
||||||
/* Transpose input matrices */
|
/* Transpose input matrices */
|
||||||
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t );
|
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t );
|
||||||
/* Call LAPACK function and adjust info */
|
/* Call LAPACK function and adjust info */
|
||||||
LAPACK_cheevd( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
|
LAPACK_cheevd( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
|
||||||
&lrwork, iwork, &liwork, &info );
|
&lrwork, iwork, &liwork, &info );
|
||||||
|
@ -79,7 +79,8 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo,
|
||||||
info = info - 1;
|
info = info - 1;
|
||||||
}
|
}
|
||||||
/* Transpose output matrices */
|
/* Transpose output matrices */
|
||||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||||
|
|
||||||
/* Release memory and exit */
|
/* Release memory and exit */
|
||||||
LAPACKE_free( a_t );
|
LAPACKE_free( a_t );
|
||||||
exit_level_0:
|
exit_level_0:
|
||||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue