This commit is contained in:
shengyang 2020-01-02 11:01:57 +08:00
commit 80db5f11e1
831 changed files with 37975 additions and 12110 deletions

View File

@ -178,4 +178,4 @@ In chronological order:
* [2019-11-06] optimize AVX512 SGEMM
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
* [2019-12-27] AVX2 CGEMM3M kernel
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels

View File

@ -247,21 +247,21 @@ prof_lapack : lapack_prebuild
lapack_prebuild :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override ARCH = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "ARCHFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "TMGLIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKELIB = ../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
@ -319,7 +319,7 @@ lapack-test :
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; make all; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
endif
lapack-runtest:

View File

@ -25,6 +25,8 @@ else ifeq ($(ARCH), i386)
override ARCH=x86
else ifeq ($(ARCH), aarch64)
override ARCH=arm64
else ifeq ($(ARCH), zarch)
override ARCH=zarch
endif
NETLIB_LAPACK_DIR = $(TOPDIR)/lapack-netlib
@ -558,6 +560,11 @@ DYNAMIC_CORE += THUNDERX2T99
DYNAMIC_CORE += TSV110
endif
ifeq ($(ARCH), zarch)
DYNAMIC_CORE = Z13
DYNAMIC_CORE += Z14
endif
ifeq ($(ARCH), power)
DYNAMIC_CORE = POWER6
DYNAMIC_CORE += POWER8

View File

@ -115,7 +115,9 @@ set(SLASRC
stplqt.f stplqt2.f stpmlqt.f
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f)
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
scombssq.f sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@ -210,7 +212,9 @@ set(CLASRC
ctplqt.f ctplqt2.f ctpmlqt.f
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f)
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
cungtsqr.f cunhr_col.f )
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@ -299,7 +303,9 @@ set(DLASRC
dtplqt.f dtplqt2.f dtpmlqt.f
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f)
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f )
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@ -398,7 +404,9 @@ set(ZLASRC
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f)
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
zungtsqr.f zunhr_col.f)
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f

View File

@ -715,6 +715,8 @@ set(DSRC
lapacke_dgesv_work.c
lapacke_dgesvd.c
lapacke_dgesvd_work.c
lapacke_dgesvdq.c
lapacke_dgesvdq_work.c
lapacke_dgesvdx.c
lapacke_dgesvdx_work.c
lapacke_dgesvj.c
@ -1287,6 +1289,8 @@ set(SSRC
lapacke_sgesv_work.c
lapacke_sgesvd.c
lapacke_sgesvd_work.c
lapacke_sgesvdq.c
lapacke_sgesvdq_work.c
lapacke_sgesvdx.c
lapacke_sgesvdx_work.c
lapacke_sgesvj.c
@ -1853,6 +1857,8 @@ set(ZSRC
lapacke_zgesv_work.c
lapacke_zgesvd.c
lapacke_zgesvd_work.c
lapacke_zgesvdq.c
lapacke_zgesvdq_work.c
lapacke_zgesvdx.c
lapacke_zgesvdx_work.c
lapacke_zgesvj.c

View File

@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO
7 NUMBER OF VALUES OF N
6 NUMBER OF VALUES OF N
1 2 3 5 7 9 35 VALUES OF N
3 NUMBER OF VALUES OF ALPHA
0.0 1.0 0.7 VALUES OF ALPHA

View File

@ -5,7 +5,7 @@ T LOGICAL FLAG, T TO STOP ON FAILURES.
T LOGICAL FLAG, T TO TEST ERROR EXITS.
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH
16.0 THRESHOLD VALUE OF TEST RATIO
7 NUMBER OF VALUES OF N
6 NUMBER OF VALUES OF N
0 1 2 3 5 9 35 VALUES OF N
3 NUMBER OF VALUES OF ALPHA
0.0 1.0 0.7 VALUES OF ALPHA

View File

@ -21,9 +21,13 @@ else
ifeq ($(ARCH),power)
COMMONOBJS += dynamic_power.$(SUFFIX)
else
ifeq ($(ARCH),zarch)
COMMONOBJS += dynamic_zarch.$(SUFFIX)
else
COMMONOBJS += dynamic.$(SUFFIX)
endif
endif
endif
else
COMMONOBJS += parameter.$(SUFFIX)
endif
@ -85,9 +89,13 @@ else
ifeq ($(ARCH),power)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_power.$(SUFFIX)
else
ifeq ($(ARCH),zarch)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX)
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
endif
endif
endif
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
endif

View File

@ -0,0 +1,131 @@
#include "common.h"
extern gotoblas_t gotoblas_Z13;
extern gotoblas_t gotoblas_Z14;
extern gotoblas_t gotoblas_Z15;
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
//extern gotoblas_t gotoblas_Z14;
//#endif
#define NUM_CORETYPES 5
extern void openblas_warning(int verbose, const char* msg);
static char* corename[] = {
"unknown",
"Z13",
"Z14",
"Z15",
"ZARCH_GENERIC",
};
char* gotoblas_corename(void) {
if (gotoblas == &gotoblas_Z13) return corename[1];
if (gotoblas == &gotoblas_Z14) return corename[2];
if (gotoblas == &gotoblas_Z15) return corename[3];
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
// if (gotoblas == &gotoblas_POWER9) return corename[3];
//#endif
return corename[0]; // try generic?
}
// __builtin_cpu_is is not supported by zarch
static gotolabs_t* get_coretype(void) {
FILE* infile;
char buffer[512], * p;
p = (char*)NULL;
infile = fopen("/proc/sysinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)) {
if (!strncmp("Type", buffer, 4)) {
p = strchr(buffer, ':') + 2;
#if 0
fprintf(stderr, "%s\n", p);
#endif
break;
}
}
fclose(infile);
if (strstr(p, "2964")) return &gotoblas_Z13;
if (strstr(p, "2965")) return &gotoblas_Z13;
if (strstr(p, "3906")) return &gotoblas_Z14;
if (strstr(p, "3907")) return &gotoblas_Z14;
if (strstr(p, "8561")) return &gotoblas_Z14; // fallback z15 to z14
if (strstr(p, "8562")) return &gotoblas_Z14; // fallback z15 to z14
return NULL; // should be ZARCH_GENERIC
}
static gotoblas_t* force_coretype(char* coretype) {
int i;
int found = -1;
char message[128];
for (i = 0; i < NUM_CORETYPES; i++)
{
if (!strncasecmp(coretype, corename[i], 20))
{
found = i;
break;
}
}
switch (found)
{
case 1: return (&gotoblas_Z13);
case 2: return (&gotoblas_Z14);
case 3: return (&gotoblas_Z15);
//#if (!defined C_GCC) || (GCC_VERSION >= 60000)
// case 3: return (&gotoblas_POWER9);
//#endif
default: return NULL;
}
snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message);
}
void gotoblas_dynamic_init(void) {
char coremsg[128];
char coren[22];
char* p;
if (gotoblas) return;
p = getenv("OPENBLAS_CORETYPE");
if (p)
{
gotoblas = force_coretype(p);
}
else
{
gotoblas = get_coretype();
}
if (gotoblas == NULL)
{
snprintf(coremsg, 128, "Falling back to Z14 core\n");
openblas_warning(1, coremsg);
gotoblas = &gotoblas_Z14;
}
if (gotoblas && gotoblas->init) {
strncpy(coren, gotoblas_corename(), 20);
sprintf(coremsg, "Core: %s\n", coren);
openblas_warning(2, coremsg);
gotoblas->init();
}
else {
openblas_warning(0, "OpenBLAS : Architecture Initialization failed. No initialization function found.\n");
exit(1);
}
}
void gotoblas_dynamic_quit(void) {
gotoblas = NULL;
}

View File

@ -694,7 +694,19 @@
# functions added for lapack-3.8.0
ilaenv2stage
ilaenv2stage,
# functions added for lapack-3.9.0
cgesvdq,
cungtsqr,
dcombssq,
dgesvdq,
dorgtsqr,
scombssq,
sgesvdq,
sorgtsqr,
zgesvdq,
zungtsqr
);
@lapack_extendedprecision_objs = (
@ -3347,6 +3359,15 @@
LAPACKE_zsytrf_aa_2stage_work,
LAPACKE_zsytrs_aa_2stage,
LAPACKE_zsytrs_aa_2stage_work,
# new functions from 3.9.0
LAPACKE_dgesvdq,
LAPACKE_dgesvdq_work,
LAPACKE_sgesvdq,
LAPACKE_sgesvdq_work,
LAPACKE_zgesvdq,
LAPACKE_zgesvdq_work
);
#These function may need 2 underscores.
@ -3419,7 +3440,13 @@
dsytrf_aa_2stage, dsytrs_aa_2stage,
zhesv_aa_2stage, zhetrf_aa_2stage,
zhetrs_aa_2stage, zsysv_aa_2stage,
zsytrf_aa_2stage, zsytrs_aa_2stage
zsytrf_aa_2stage, zsytrs_aa_2stage,
# 3.9.0
claunhr_col_getrfnp, claunhr_col_getrfnp2, cunhr_col,
dlaorhr_col_getrfnp, dlaorhr_col_getrfnp2, dorhr_col,
slaorhr_col_getrfnp, slaorhr_col_getrfnp2, sorhr_col,
zlaunhr_col_getrfnp, zlaunhr_col_getrfnp2, zunhr_col
);

View File

@ -103,26 +103,34 @@ ZDOTKERNEL = zdot.S
DSDOTKERNEL = dot.S
DGEMM_BETA = dgemm_beta.S
SGEMM_BETA = sgemm_beta.S
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
ifeq ($(SGEMM_UNROLL_N), 4)
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
ifeq ($(SGEMM_UNROLL_M), 16)
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
else
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
endif
ifeq ($(SGEMM_UNROLL_M), 4)
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
else
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
endif
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(SGEMM_UNROLL_N), 16)
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
else
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
endif
ifeq ($(SGEMM_UNROLL_N), 4)
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
else
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
endif
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

View File

@ -109,22 +109,29 @@ ZGEMVTKERNEL = zgemv_t.S
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
ifeq ($(SGEMM_UNROLL_N), 4)
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
ifeq ($(SGEMM_UNROLL_M), 16)
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
else
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
endif
ifeq ($(SGEMM_UNROLL_M), 4)
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
else
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
endif
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
endif
ifeq ($(SGEMM_UNROLL_N), 16)
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
else
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
endif
ifeq ($(SGEMM_UNROLL_N), 4)
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
else
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
endif
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)

View File

@ -43,7 +43,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define betaV0 v11.d[0]
#define I x16
#define size 128
#define prfm_size 640
#define calc_size 128
/**************************************************************************************
* Macro definitions
@ -126,20 +127,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmul v2.2d, v2.2d, betaV0
fmul v3.2d, v3.2d, betaV0
prfm PLDL1KEEP, [A01, prfm_size]
fmul v4.2d, v4.2d, betaV0
fmul v5.2d, v5.2d, betaV0
prfm PLDL1KEEP, [A03, prfm_size]
fmul v6.2d, v6.2d, betaV0
fmul v7.2d, v7.2d, betaV0
st1 {v0.2d, v1.2d}, [A01]
add A01, A01, size
add A01, A01, calc_size
st1 {v2.2d, v3.2d}, [A02]
add A02, A02, size
add A02, A02, calc_size
st1 {v4.2d, v5.2d}, [A03]
add A03, A03, size
add A03, A03, calc_size
st1 {v6.2d, v7.2d}, [A04]
add A04, A04, size
add A04, A04, calc_size
subs I , I , #1
bne .Lgemm_beta_03

259
kernel/arm64/sgemm_beta.S Executable file
View File

@ -0,0 +1,259 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A00 PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#define ASSEMBLER
#include "common.h"
#define M x0
#define N x1
#define BETA s0
#define LDC x6
#define C00 x7
#define A01 x8
#define A02 x9
#define A03 x10
#define A04 x11
#define I x12
#define beta0 s11
#define betaV0 v11.s[0]
#define prfm_size 640
#define calc_size 128
/**************************************************************************************
* Macro definitions
**************************************************************************************/
.macro SAVE_REGS
add sp, sp, #-(11 * 16)
stp d8, d9, [sp, #(0 * 16)]
stp d10, d11, [sp, #(1 * 16)]
stp d12, d13, [sp, #(2 * 16)]
stp d14, d15, [sp, #(3 * 16)]
stp d16, d17, [sp, #(4 * 16)]
stp x18, x19, [sp, #(5 * 16)]
stp x20, x21, [sp, #(6 * 16)]
stp x22, x23, [sp, #(7 * 16)]
stp x24, x25, [sp, #(8 * 16)]
stp x26, x27, [sp, #(9 * 16)]
str x28, [sp, #(10 * 16)]
.endm
.macro RESTORE_REGS
ldp d8, d9, [sp, #(0 * 16)]
ldp d10, d11, [sp, #(1 * 16)]
ldp d12, d13, [sp, #(2 * 16)]
ldp d14, d15, [sp, #(3 * 16)]
ldp d16, d17, [sp, #(4 * 16)]
ldp x18, x19, [sp, #(5 * 16)]
ldp x20, x21, [sp, #(6 * 16)]
ldp x22, x23, [sp, #(7 * 16)]
ldp x24, x25, [sp, #(8 * 16)]
ldp x26, x27, [sp, #(9 * 16)]
ldr x28, [sp, #(10 * 16)]
add sp, sp, #(11*16)
.endm
.macro INIT_ZERO
fmul v0.4s, v0.4s, betaV0
fmul v1.4s, v1.4s, betaV0
fmul v2.4s, v2.4s, betaV0
fmul v3.4s, v3.4s, betaV0
fmul v4.4s, v4.4s, betaV0
fmul v5.4s, v5.4s, betaV0
fmul v6.4s, v6.4s, betaV0
fmul v7.4s, v7.4s, betaV0
.endm
/**************************************************************************************
* End of macro definitions
**************************************************************************************/
PROLOGUE
.align 5
ldr LDC, [sp]
SAVE_REGS
.Lgemm_beta_BEGIN:
fmov beta0, BETA
cmp N, #0
ble .Lgemm_beta_L999
fcmp BETA, #0.0
beq .Lgemm_beta_zero_01
.Lgemm_beta_01:
lsl LDC, LDC, #2
.align 5
.Lgemm_beta_02:
mov A01, C00
add C00, C00, LDC
asr I, M, #5
cmp I, #0
ble .Lgemm_beta_04
add A02, A01, #32
add A03, A02, #32
add A04, A03, #32
.align 5
.Lgemm_beta_03:
prfm PLDL1KEEP, [A01, prfm_size]
ldp q0, q1, [A01]
ldp q2, q3, [A02]
ldp q4, q5, [A03]
ldp q6, q7, [A04]
fmul v0.4s, v0.4s, betaV0
fmul v1.4s, v1.4s, betaV0
fmul v2.4s, v2.4s, betaV0
fmul v3.4s, v3.4s, betaV0
fmul v4.4s, v4.4s, betaV0
fmul v5.4s, v5.4s, betaV0
fmul v6.4s, v6.4s, betaV0
fmul v7.4s, v7.4s, betaV0
prfm PLDL1KEEP, [A01, prfm_size + 64]
st1 {v0.4s, v1.4s}, [A01]
add A01, A01, calc_size
st1 {v2.4s, v3.4s}, [A02]
add A02, A02, calc_size
st1 {v4.4s, v5.4s}, [A03]
add A03, A03, calc_size
st1 {v6.4s, v7.4s}, [A04]
add A04, A04, calc_size
subs I , I , #1
bne .Lgemm_beta_03
.align 5
.Lgemm_beta_04:
and I, M , #31
cmp I, #0
ble .Lgemm_beta_06
.align 5
.Lgemm_beta_05:
ldr s12, [A01]
fmul s12, s12, beta0
str s12, [A01]
add A01, A01, #4
subs I , I , #1
bne .Lgemm_beta_05
.align 5
.Lgemm_beta_06:
subs N , N, #1 // N--
bne .Lgemm_beta_02
.align 5
.Lgemm_beta_L999:
mov x0, #0
RESTORE_REGS
ret
.align 5
.Lgemm_beta_zero_01:
INIT_ZERO
lsl LDC, LDC, #2
.align 5
.Lgemm_beta_zero_02:
mov A01, C00
add C00, C00, LDC
asr I, M, #5
cmp I, #0
ble .Lgemm_beta_zero_04
add A02, A01, #32
add A03, A02, #32
add A04, A03, #32
.align 5
.Lgemm_beta_zero_03:
st1 {v0.4s, v1.4s}, [A01]
add A01, A01, calc_size
st1 {v2.4s, v3.4s}, [A02]
add A02, A02, calc_size
st1 {v4.4s, v5.4s}, [A03]
add A03, A03, calc_size
st1 {v6.4s, v7.4s}, [A04]
add A04, A04, calc_size
subs I, I, #1
bne .Lgemm_beta_zero_03
.align 5
.Lgemm_beta_zero_04:
and I, M, #31
cmp I, #0
ble .Lgemm_beta_zero_06
.align 5
.Lgemm_beta_zero_05:
str beta0, [A01]
add A01, A01, #4
subs I, I, #1
bne .Lgemm_beta_zero_05
.align 5
.Lgemm_beta_zero_06:
subs N, N, #1
bne .Lgemm_beta_zero_02
.align 5
.Lgemm_beta_zero_L999:
mov x0, #0
RESTORE_REGS
ret
EPILOGUE

View File

@ -0,0 +1,824 @@
/***************************************************************************
Copyright (c) 2019, The OpenBLAS Project
All rights reserved.
*****************************************************************************/
#define ASSEMBLER
#include "common.h"
#define M x0
#define N x1
#define A x2
#define LDA x3
#define B x4
#define M8 x5
#define A01 x6
#define A02 x7
#define A03 x8
#define A04 x9
#define A05 x10
#define A06 x11
#define A07 x12
#define A08 x13
#define B01 x14
#define B02 x15
#define B03 x16
#define B04 x17
#define B00 x22
#define I x18
#define J x19
#define TEMP1 x20
#define A_PREFETCH 256
/**************************************************************************************
* Macro definitions
**************************************************************************************/
.macro SAVE_REGS
add sp, sp, #-(11 * 16)
stp d8, d9, [sp, #(0 * 16)]
stp d10, d11, [sp, #(1 * 16)]
stp d12, d13, [sp, #(2 * 16)]
stp d14, d15, [sp, #(3 * 16)]
stp d16, d17, [sp, #(4 * 16)]
stp x18, x19, [sp, #(5 * 16)]
stp x20, x21, [sp, #(6 * 16)]
stp x22, x23, [sp, #(7 * 16)]
stp x24, x25, [sp, #(8 * 16)]
stp x26, x27, [sp, #(9 * 16)]
str x28, [sp, #(10 * 16)]
.endm
.macro RESTORE_REGS
ldp d8, d9, [sp, #(0 * 16)]
ldp d10, d11, [sp, #(1 * 16)]
ldp d12, d13, [sp, #(2 * 16)]
ldp d14, d15, [sp, #(3 * 16)]
ldp d16, d17, [sp, #(4 * 16)]
ldp x18, x19, [sp, #(5 * 16)]
ldp x20, x21, [sp, #(6 * 16)]
ldp x22, x23, [sp, #(7 * 16)]
ldp x24, x25, [sp, #(8 * 16)]
ldp x26, x27, [sp, #(9 * 16)]
ldr x28, [sp, #(10 * 16)]
add sp, sp, #(11*16)
.endm
/*************************************************************************************************************************/
.macro COPY16x8
prfm PLDL1KEEP, [A01, #A_PREFETCH]
prfm PLDL1KEEP, [A02, #A_PREFETCH]
prfm PLDL1KEEP, [A03, #A_PREFETCH]
prfm PLDL1KEEP, [A04, #A_PREFETCH]
prfm PLDL1KEEP, [A05, #A_PREFETCH]
prfm PLDL1KEEP, [A06, #A_PREFETCH]
prfm PLDL1KEEP, [A07, #A_PREFETCH]
prfm PLDL1KEEP, [A08, #A_PREFETCH]
//prfm PSTL1KEEP, [B00, M8]
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
add A01, A01, #64
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
add TEMP1, B00, #64
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02]
add A02, A02, #64
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03]
add A03, A03, #64
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04]
add A04, A04, #64
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v16.4s, v17.4s, v18.4s, v19.4s}, [A05]
add A05, A05, #64
st1 {v16.4s, v17.4s, v18.4s, v19.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v20.4s, v21.4s, v22.4s, v23.4s}, [A06]
add A06, A06, #64
st1 {v20.4s, v21.4s, v22.4s, v23.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v24.4s, v25.4s, v26.4s, v27.4s}, [A07]
add A07, A07, #64
st1 {v24.4s, v25.4s, v26.4s, v27.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v28.4s, v29.4s, v30.4s, v31.4s}, [A08]
add A08, A08, #64
st1 {v28.4s, v29.4s, v30.4s, v31.4s}, [TEMP1]
add TEMP1, TEMP1, #64
add B00, B00, M8
.endm
.macro COPY8x8
prfm PLDL1KEEP, [A01, #A_PREFETCH]
prfm PLDL1KEEP, [A02, #A_PREFETCH]
prfm PLDL1KEEP, [A03, #A_PREFETCH]
prfm PLDL1KEEP, [A04, #A_PREFETCH]
prfm PLDL1KEEP, [A05, #A_PREFETCH]
prfm PLDL1KEEP, [A06, #A_PREFETCH]
prfm PLDL1KEEP, [A07, #A_PREFETCH]
prfm PLDL1KEEP, [A08, #A_PREFETCH]
ldp q0, q1, [A01]
ldp q2, q3, [A02]
add A01, A01, #32
add A02, A02, #32
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01]
add B01, B01, #64
ldp q4, q5, [A03]
ldp q6, q7, [A04]
add A03, A03, #32
add A04, A04, #32
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01]
add B01, B01, #64
ldp q8, q9, [A05]
ldp q10, q11, [A06]
add A05, A05, #32
add A06, A06, #32
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [B01]
add B01, B01, #64
ldp q12, q13, [A07]
ldp q14, q15, [A08]
add A07, A07, #32
add A08, A08, #32
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [B01]
add B01, B01, #64
.endm
.macro COPY4x8
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
//prfm PLDL1KEEP, [A05, #A_PREFETCH]
//prfm PLDL1KEEP, [A06, #A_PREFETCH]
//prfm PLDL1KEEP, [A07, #A_PREFETCH]
//prfm PLDL1KEEP, [A08, #A_PREFETCH]
ldr q0, [A01]
ldr q1, [A02]
ldr q2, [A03]
ldr q3, [A04]
add A01, A01, #16
add A02, A02, #16
add A03, A03, #16
add A04, A04, #16
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02]
add B02, B02, #64
ldr q4, [A05]
ldr q5, [A06]
ldr q6, [A07]
ldr q7, [A08]
add A05, A05, #16
add A06, A06, #16
add A07, A07, #16
add A08, A08, #16
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B02]
add B02, B02, #64
.endm
.macro COPY2x8
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
//prfm PLDL1KEEP, [A05, #A_PREFETCH]
//prfm PLDL1KEEP, [A06, #A_PREFETCH]
//prfm PLDL1KEEP, [A07, #A_PREFETCH]
//prfm PLDL1KEEP, [A08, #A_PREFETCH]
ldr d0, [A01]
ldr d1, [A02]
ldr d2, [A03]
ldr d3, [A04]
add A01, A01, #8
add A02, A02, #8
add A03, A03, #8
add A04, A04, #8
stp d0, d1, [B03]
add B03, B03, #16
stp d2, d3, [B03]
add B03, B03, #16
ldr d4, [A05]
ldr d5, [A06]
ldr d6, [A07]
ldr d7, [A08]
add A05, A05, #8
add A06, A06, #8
add A07, A07, #8
add A08, A08, #8
stp d4, d5, [B03]
add B03, B03, #16
stp d6, d7, [B03]
add B03, B03, #16
.endm
.macro COPY1x8
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
//prfm PLDL1KEEP, [A05, #A_PREFETCH]
//prfm PLDL1KEEP, [A06, #A_PREFETCH]
//prfm PLDL1KEEP, [A07, #A_PREFETCH]
//prfm PLDL1KEEP, [A08, #A_PREFETCH]
ldr s0, [A01]
ldr s1, [A02]
ldr s2, [A03]
ldr s3, [A04]
add A01, A01, #4
add A02, A02, #4
add A03, A03, #4
add A04, A04, #4
stp s0, s1, [B04]
add B04, B04, #8
stp s2, s3, [B04]
add B04, B04, #8
ldr s4, [A05]
ldr s5, [A06]
ldr s6, [A07]
ldr s7, [A08]
ldr d4, [A05], #8
ldr d5, [A06], #8
ldr d6, [A07], #8
ldr d7, [A08], #8
stp s4, s5, [B04]
add B04, B04, #8
stp s6, s7, [B04]
add B04, B04, #8
.endm
/*************************************************************************************************************************/
.macro COPY16x4
prfm PLDL1KEEP, [A01, #A_PREFETCH]
prfm PLDL1KEEP, [A02, #A_PREFETCH]
prfm PLDL1KEEP, [A03, #A_PREFETCH]
prfm PLDL1KEEP, [A04, #A_PREFETCH]
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
add A01, A01, #64
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
add TEMP1, B00, #64
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02]
add A02, A02, #64
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v8.4s, v9.4s, v10.4s, v11.4s}, [A03]
add A03, A03, #64
st1 {v8.4s, v9.4s, v10.4s, v11.4s}, [TEMP1]
add TEMP1, TEMP1, #64
ld1 {v12.4s, v13.4s, v14.4s, v15.4s}, [A04]
add A04, A04, #64
st1 {v12.4s, v13.4s, v14.4s, v15.4s}, [TEMP1]
add B00, B00, M8
.endm
.macro COPY8x4
prfm PLDL1KEEP, [A01, #A_PREFETCH]
prfm PLDL1KEEP, [A02, #A_PREFETCH]
prfm PLDL1KEEP, [A03, #A_PREFETCH]
prfm PLDL1KEEP, [A04, #A_PREFETCH]
ldp q0, q1, [A01]
ldp q2, q3, [A02]
add A01, A01, #32
add A02, A02, #32
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01]
add B01, B01, #64
ldp q4, q5, [A03]
ldp q6, q7, [A04]
add A03, A03, #32
add A04, A04, #32
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [B01]
add B01, B01, #64
.endm
.macro COPY4x4
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
ldr q0, [A01]
ldr q1, [A02]
ldr q2, [A03]
ldr q3, [A04]
add A01, A01, #16
add A02, A02, #16
add A03, A03, #16
add A04, A04, #16
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B02]
add B02, B02, #64
.endm
.macro COPY2x4
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
ldr d0, [A01]
ldr d1, [A02]
ldr d2, [A03]
ldr d3, [A04]
add A01, A01, #8
add A02, A02, #8
add A03, A03, #8
add A04, A04, #8
stp d0, d1, [B03]
add B03, B03, #16
stp d2, d3, [B03]
add B03, B03, #16
.endm
.macro COPY1x4
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
//prfm PLDL1KEEP, [A03, #A_PREFETCH]
//prfm PLDL1KEEP, [A04, #A_PREFETCH]
ldr s0, [A01]
ldr s1, [A02]
ldr s2, [A03]
ldr s3, [A04]
add A01, A01, #4
add A02, A02, #4
add A03, A03, #4
add A04, A04, #4
stp s0, s1, [B04]
add B04, B04, #8
stp s2, s3, [B04]
add B04, B04, #8
.endm
/*************************************************************************************************************************/
.macro COPY16x2
prfm PLDL1KEEP, [A01, #A_PREFETCH]
prfm PLDL1KEEP, [A02, #A_PREFETCH]
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
add A01, A01, #64
ld1 {v4.4s, v5.4s, v6.4s, v7.4s}, [A02]
add A02, A02, #64
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
add TEMP1, B00, #64
st1 {v4.4s, v5.4s, v6.4s, v7.4s}, [TEMP1]
add B00, B00, M8
.endm
.macro COPY8x2
prfm PLDL1KEEP, [A01, #A_PREFETCH]
prfm PLDL1KEEP, [A02, #A_PREFETCH]
ld1 {v0.4s, v1.4s}, [A01]
ld1 {v2.4s, v3.4s}, [A02]
add A01, A01, #32
add A02, A02, #32
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B01]
add B01, B01, #64
.endm
.macro COPY4x2
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
ldr q0, [A01]
ldr q1, [A02]
add A01, A01, #16
add A02, A02, #16
stp q0, q1, [B02]
add B02, B02, #32
.endm
.macro COPY2x2
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
ldr d0, [A01]
ldr d1, [A02]
add A01, A01, #8
add A02, A02, #8
stp d0, d1, [B03]
add B03, B03, #16
.endm
.macro COPY1x2
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
//prfm PLDL1KEEP, [A02, #A_PREFETCH]
ldr s0, [A01]
ldr s1, [A02]
add A01, A01, #4
add A02, A02, #4
stp s0, s1, [B04]
add B04, B04, #8
.endm
/*************************************************************************************************************************/
.macro COPY16x1
prfm PLDL1KEEP, [A01, #A_PREFETCH]
ld1 {v0.4s, v1.4s, v2.4s, v3.4s}, [A01]
add A01, A01, #64
st1 {v0.4s, v1.4s, v2.4s, v3.4s}, [B00]
add B00, B00, M8
.endm
.macro COPY8x1
prfm PLDL1KEEP, [A01, #A_PREFETCH]
ldp q0, q1, [A01]
add A01, A01, #32
stp q0, q1, [B01]
add B01, B01, #32
.endm
.macro COPY4x1
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
ldr q0, [A01]
add A01, A01, #16
str q0, [B02]
add B02, B02, #16
.endm
.macro COPY2x1
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
ldr d0, [A01]
add A01, A01, #8
str d0, [B03]
add B03, B03, #8
.endm
.macro COPY1x1
//prfm PLDL1KEEP, [A01, #A_PREFETCH]
ldr s0, [A01]
add A01, A01, #4
str s0, [B04]
add B04, B04, #4
.endm
/**************************************************************************************
* End of macro definitions
**************************************************************************************/
PROLOGUE
.align 5
SAVE_REGS
lsl LDA, LDA, #2 // LDA = LDA * SIZE
lsl TEMP1, M, #2 // TEMP1 = M * SIZE
and B01 , N , #-16
and B02 , N , #-8
and B03 , N , #-4
and B04 , N , #-2
mul B01, B01, TEMP1
mul B02, B02, TEMP1
mul B03, B03, TEMP1
mul B04, B04, TEMP1
add B01 , B01, B
add B02 , B02, B
add B03 , B03, B
add B04 , B04, B
lsl M8, M, #6 // M8 = M * 16 * SIZE
.Lsgemm_tcopy_L8_BEGIN:
asr J, M, #3 // J = M / 8
cmp J, #0
ble .Lsgemm_tcopy_L4_BEGIN
.align 5
.Lsgemm_tcopy_L8_M16_BEGIN:
mov A01, A
add A02, A01, LDA
add A03, A02, LDA
add A04, A03, LDA
add A05, A04, LDA
add A06, A05, LDA
add A07, A06, LDA
add A08, A07, LDA
add A, A08, LDA
mov B00, B
add B, B00, #512 // B = B + 8 * 16 * SIZE
asr I, N, #4 // I = N / 16
cmp I, #0
ble .Lsgemm_tcopy_L8_M16_40
.align 5
.Lsgemm_tcopy_L8_M16_20:
COPY16x8
subs I , I , #1
bne .Lsgemm_tcopy_L8_M16_20
.Lsgemm_tcopy_L8_M16_40:
tst N , #8
ble .Lsgemm_tcopy_L8_M16_60
COPY8x8
.Lsgemm_tcopy_L8_M16_60:
tst N , #4
ble .Lsgemm_tcopy_L8_M16_80
COPY4x8
.Lsgemm_tcopy_L8_M16_80:
tst N , #2
ble .Lsgemm_tcopy_L8_M16_100
COPY2x8
.Lsgemm_tcopy_L8_M16_100:
tst N, #1
ble .Lsgemm_tcopy_L8_M16_END
COPY1x8
.Lsgemm_tcopy_L8_M16_END:
subs J , J, #1 // j--
bne .Lsgemm_tcopy_L8_M16_BEGIN
/*********************************************************************************************/
.Lsgemm_tcopy_L4_BEGIN:
tst M, #7
ble .Lsgemm_tcopy_L999
tst M, #4
ble .Lsgemm_tcopy_L2_BEGIN
.Lsgemm_tcopy_L4_M16_BEGIN:
mov A01, A
add A02, A01, LDA
add A03, A02, LDA
add A04, A03, LDA
add A, A04, LDA
mov B00, B
add B, B00, #256 // B = B + 4 * 16 * SIZE
asr I, N, #4 // I = N / 16
cmp I, #0
ble .Lsgemm_tcopy_L4_M16_40
.align 5
.Lsgemm_tcopy_L4_M16_20:
COPY16x4
subs I , I , #1
bne .Lsgemm_tcopy_L4_M16_20
.Lsgemm_tcopy_L4_M16_40:
tst N , #8
ble .Lsgemm_tcopy_L4_M16_60
COPY8x4
.Lsgemm_tcopy_L4_M16_60:
tst N , #4
ble .Lsgemm_tcopy_L4_M16_80
COPY4x4
.Lsgemm_tcopy_L4_M16_80:
tst N , #2
ble .Lsgemm_tcopy_L4_M16_100
COPY2x4
.Lsgemm_tcopy_L4_M16_100:
tst N, #1
ble .Lsgemm_tcopy_L4_M16_END
COPY1x4
.Lsgemm_tcopy_L4_M16_END:
/*********************************************************************************************/
.Lsgemm_tcopy_L2_BEGIN:
tst M, #3
ble .Lsgemm_tcopy_L999
tst M, #2
ble .Lsgemm_tcopy_L1_BEGIN
.Lsgemm_tcopy_L2_M16_BEGIN:
mov A01, A
add A02, A01, LDA
add A, A02, LDA
mov B00, B
add B, B00, #128 // B = B + 2 * 16 * SIZE
asr I, N, #4 // I = N / 16
cmp I, #0
ble .Lsgemm_tcopy_L2_M16_40
.align 5
.Lsgemm_tcopy_L2_M16_20:
COPY16x2
subs I , I , #1
bne .Lsgemm_tcopy_L2_M16_20
.Lsgemm_tcopy_L2_M16_40:
tst N , #8
ble .Lsgemm_tcopy_L2_M16_60
COPY8x2
.Lsgemm_tcopy_L2_M16_60:
tst N , #4
ble .Lsgemm_tcopy_L2_M16_80
COPY4x2
.Lsgemm_tcopy_L2_M16_80:
tst N , #2
ble .Lsgemm_tcopy_L2_M16_100
COPY2x2
.Lsgemm_tcopy_L2_M16_100:
tst N , #1
ble .Lsgemm_tcopy_L2_M16_END
COPY1x2
.Lsgemm_tcopy_L2_M16_END:
/*********************************************************************************************/
.Lsgemm_tcopy_L1_BEGIN:
tst M, #1
ble .Lsgemm_tcopy_L999
.Lsgemm_tcopy_L1_M16_BEGIN:
mov A01, A // A01 = A
mov B00, B
asr I, N, #4 // I = M / 16
cmp I, #0
ble .Lsgemm_tcopy_L1_M16_40
.align 5
.Lsgemm_tcopy_L1_M16_20:
COPY16x1
subs I , I , #1
bne .Lsgemm_tcopy_L1_M16_20
.Lsgemm_tcopy_L1_M16_40:
tst N , #8
ble .Lsgemm_tcopy_L1_M16_60
COPY8x1
.Lsgemm_tcopy_L1_M16_60:
tst N , #4
ble .Lsgemm_tcopy_L1_M16_80
COPY4x1
.Lsgemm_tcopy_L1_M16_80:
tst N , #2
ble .Lsgemm_tcopy_L1_M16_100
COPY2x1
.Lsgemm_tcopy_L1_M16_100:
tst N , #1
ble .Lsgemm_tcopy_L1_M16_END
COPY1x1
.Lsgemm_tcopy_L1_M16_END:
.Lsgemm_tcopy_L999:
mov x0, #0 // set return value
RESTORE_REGS
ret
EPILOGUE

View File

@ -739,6 +739,26 @@ static void init_parameter(void) {
}
#else //POWER
#if defined(ARCH_ZARCH)
static void init_parameter(void) {
TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P;
TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P;
TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P;
TABLE_NAME.zgemm_p = ZGEMM_DEFAULT_P;
TABLE_NAME.sgemm_r = SGEMM_DEFAULT_R;
TABLE_NAME.dgemm_r = DGEMM_DEFAULT_R;
TABLE_NAME.cgemm_r = CGEMM_DEFAULT_R;
TABLE_NAME.zgemm_r = ZGEMM_DEFAULT_R;
TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q;
TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q;
TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q;
TABLE_NAME.zgemm_q = ZGEMM_DEFAULT_Q;
}
#else //ZARCH
#ifdef ARCH_X86
static int get_l2_size_old(void){
int i, eax, ebx, ecx, edx, cpuid_level;
@ -1325,4 +1345,5 @@ static void init_parameter(void) {
}
#endif //POWER
#endif //ZARCH
#endif //defined(ARCH_ARM64)

View File

@ -98,5 +98,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c

View File

@ -95,5 +95,5 @@ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
CGEMM3MKERNEL = cgemm3m_kernel_8x4_haswell.c
ZGEMM3MKERNEL = zgemm3m_kernel_2x8_nehalem.S
ZGEMM3MKERNEL = zgemm3m_kernel_4x4_haswell.c

View File

@ -0,0 +1,224 @@
/* %0 = "+r"(a_pointer), %1 = "+r"(b_pointer), %2 = "+r"(c_pointer), %3 = "+r"(ldc_in_bytes), %4 for k_count, %5 for c_store */
/* r12 = k << 5(const), r13 = k(const), r14 = b_head_pos(const), r15 = tmp */
#include "common.h"
#include <stdint.h>
//recommended settings: GEMM_Q=256, GEMM_P=256
/* m = 4 *//* ymm0 for alpha, ymm1-ymm3 for temporary use, ymm4-ymm15 for accumulators */
#define KERNEL_k1m4n1 \
"vmovupd (%0),%%ymm1; addq $32,%0;"\
"vbroadcastsd (%1),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,%%ymm4;"\
"addq $8,%1;"
#define KERNEL_h_k1m4n2 \
"vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2; addq $32,%0;"\
"vbroadcastf128 (%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm4; vfmadd231pd %%ymm2,%%ymm3,%%ymm5;"
#define KERNEL_k1m4n2 KERNEL_h_k1m4n2 "addq $16,%1;"
#define KERNEL_h_k1m4n4 \
KERNEL_h_k1m4n2 "vbroadcastf128 16(%1),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,%%ymm6; vfmadd231pd %%ymm2,%%ymm3,%%ymm7;"
#define KERNEL_k1m4n4 KERNEL_h_k1m4n4 "addq $32,%1;"
#define unit_kernel_k1m4n4(c1,c2,c3,c4,off1,off2,...) \
"vbroadcastf128 "#off1"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";"\
"vbroadcastf128 "#off2"("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c3"; vfmadd231pd %%ymm2,%%ymm3,"#c4";"
#define KERNEL_h_k1m4n8 KERNEL_h_k1m4n4 unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1)
#define KERNEL_k1m4n8 KERNEL_h_k1m4n8 "addq $32,%1;"
#define KERNEL_h_k1m4n12 KERNEL_h_k1m4n8 unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2)
#define KERNEL_k1m4n12 KERNEL_h_k1m4n12 "addq $32,%1;"
#define KERNEL_k2m4n1 KERNEL_k1m4n1 KERNEL_k1m4n1
#define KERNEL_k2m4n2 KERNEL_k1m4n2 KERNEL_k1m4n2
#define KERNEL_k2m4n4 KERNEL_k1m4n4 KERNEL_k1m4n4
#define KERNEL_k2m4n8 KERNEL_k1m4n8 KERNEL_k1m4n8
#define KERNEL_k2m4n12 \
"vmovddup (%0),%%ymm1; vmovddup 8(%0),%%ymm2;"\
unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,0,16,%1)\
unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,0,16,%1,%%r12,1)\
unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,0,16,%1,%%r12,2)\
"vmovddup 32(%0),%%ymm1; vmovddup 40(%0),%%ymm2; prefetcht0 512(%0); addq $64,%0;"\
unit_kernel_k1m4n4(%%ymm4,%%ymm5,%%ymm6,%%ymm7,32,48,%1)\
unit_kernel_k1m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11,32,48,%1,%%r12,1)\
unit_kernel_k1m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15,32,48,%1,%%r12,2) "addq $64,%1;"
#define INIT_m4n1 "vpxor %%ymm4,%%ymm4,%%ymm4;"
#define INIT_m4n2 INIT_m4n1 "vpxor %%ymm5,%%ymm5,%%ymm5;"
#define INIT_m4n4 INIT_m4n2 "vpxor %%ymm6,%%ymm6,%%ymm6;vpxor %%ymm7,%%ymm7,%%ymm7;"
#define unit_init_m4n4(c1,c2,c3,c4) \
"vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";vpxor "#c3","#c3","#c3";vpxor "#c4","#c4","#c4";"
#define INIT_m4n8 INIT_m4n4 unit_init_m4n4(%%ymm8,%%ymm9,%%ymm10,%%ymm11)
#define INIT_m4n12 INIT_m4n8 unit_init_m4n4(%%ymm12,%%ymm13,%%ymm14,%%ymm15)
#define SAVE_h_m4n1 \
"vpermpd $216,%%ymm4,%%ymm3; vunpcklpd %%ymm3,%%ymm3,%%ymm1; vunpckhpd %%ymm3,%%ymm3,%%ymm2;"\
"vfmadd213pd (%2),%%ymm0,%%ymm1; vfmadd213pd 32(%2),%%ymm0,%%ymm2; vmovupd %%ymm1,(%2); vmovupd %%ymm2,32(%2);"
#define unit_save_m4n2(c1,c2) \
"vperm2f128 $2,"#c1","#c2",%%ymm2; vperm2f128 $19,"#c1","#c2","#c2"; vmovapd %%ymm2,"#c1";"\
"vunpcklpd "#c1","#c1",%%ymm2; vunpcklpd "#c2","#c2",%%ymm3;"\
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd 32(%5),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,32(%5);"\
"vunpckhpd "#c1","#c1",%%ymm2; vunpckhpd "#c2","#c2",%%ymm3;"\
"vfmadd213pd (%5,%3,1),%%ymm0,%%ymm2; vfmadd213pd 32(%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5,%3,1); vmovupd %%ymm3,32(%5,%3,1);"\
"leaq (%5,%3,2),%5;"
#define SAVE_h_m4n2 "movq %2,%5;" unit_save_m4n2(%%ymm4,%%ymm5)
#define SAVE_h_m4n4 SAVE_h_m4n2 unit_save_m4n2(%%ymm6,%%ymm7)
#define SAVE_h_m4n8 SAVE_h_m4n4 unit_save_m4n2(%%ymm8,%%ymm9) unit_save_m4n2(%%ymm10,%%ymm11)
#define SAVE_h_m4n12 SAVE_h_m4n8 unit_save_m4n2(%%ymm12,%%ymm13) unit_save_m4n2(%%ymm14,%%ymm15)
#define SAVE_m4(ndim) SAVE_h_m4n##ndim "addq $64,%2;"
#define COMPUTE_m4(ndim) \
INIT_m4n##ndim\
"movq %%r13,%4; movq %%r14,%1; movq %2,%5; xorq %%r15,%%r15;"\
"cmpq $24,%4; jb "#ndim"004042f;"\
#ndim"004041:\n\t"\
"cmpq $126,%%r15; movq $126,%%r15; cmoveq %3,%%r15;"\
KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\
"prefetcht1 (%5); subq $63,%5;"\
KERNEL_k2m4n##ndim KERNEL_k2m4n##ndim\
"addq %%r15,%5; prefetcht1 (%8); addq $32,%8;"\
"subq $8,%4; cmpq $16,%4; jnb "#ndim"004041b;"\
"movq %2,%5;"\
#ndim"004042:\n\t"\
"testq %4,%4; jz "#ndim"004043f;"\
"prefetcht0 (%5); prefetcht0 63(%5);"\
KERNEL_k1m4n##ndim\
"prefetcht0 (%5,%3,4); prefetcht0 63(%5,%3,4); addq %3,%5;"\
"decq %4; jmp "#ndim"004042b;"\
#ndim"004043:\n\t"\
"prefetcht0 (%%r14); prefetcht0 64(%%r14);"\
SAVE_m4(ndim)
/* m = 2 *//* vmm0 for alpha, vmm1-vmm3 for temporary use, vmm4-vmm9 for accumulators */
#define KERNEL_k1m2n1 \
"vmovupd (%0),%%xmm1; addq $16,%0;"\
"vmovddup (%1),%%xmm2; vfmadd231pd %%xmm1,%%xmm2,%%xmm4;"\
"addq $8,%1;"
#define KERNEL_h_k1m2n2 \
"vmovddup (%0),%%xmm1; vmovddup 8(%0),%%xmm2; addq $16,%0;"\
"vmovupd (%1),%%xmm3; vfmadd231pd %%xmm1,%%xmm3,%%xmm4; vfmadd231pd %%xmm2,%%xmm3,%%xmm5;"
#define KERNEL_k1m2n2 KERNEL_h_k1m2n2 "addq $16,%1;"
#define unit_kernel_k1m2n4(c1,c2,...) \
"vmovupd ("#__VA_ARGS__"),%%ymm3; vfmadd231pd %%ymm1,%%ymm3,"#c1"; vfmadd231pd %%ymm2,%%ymm3,"#c2";"
#define KERNEL_h_k1m2n4 \
"vbroadcastsd (%0),%%ymm1; vbroadcastsd 8(%0),%%ymm2; addq $16,%0;"\
unit_kernel_k1m2n4(%%ymm4,%%ymm5,%1)
#define KERNEL_k1m2n4 KERNEL_h_k1m2n4 "addq $32,%1;"
#define KERNEL_h_k1m2n8 KERNEL_h_k1m2n4 \
unit_kernel_k1m2n4(%%ymm6,%%ymm7,%1,%%r12,1)
#define KERNEL_k1m2n8 KERNEL_h_k1m2n8 "addq $32,%1;"
#define KERNEL_h_k1m2n12 KERNEL_h_k1m2n8 \
unit_kernel_k1m2n4(%%ymm8,%%ymm9,%1,%%r12,2)
#define KERNEL_k1m2n12 KERNEL_h_k1m2n12 "addq $32,%1;"
#define INIT_m2n1 "vpxor %%xmm4,%%xmm4,%%xmm4;"
#define INIT_m2n2 INIT_m2n1 "vpxor %%xmm5,%%xmm5,%%xmm5;"
#define unit_init_m2n4(c1,c2) "vpxor "#c1","#c1","#c1";vpxor "#c2","#c2","#c2";"
#define INIT_m2n4 unit_init_m2n4(%%ymm4,%%ymm5)
#define INIT_m2n8 INIT_m2n4 unit_init_m2n4(%%ymm6,%%ymm7)
#define INIT_m2n12 INIT_m2n8 unit_init_m2n4(%%ymm8,%%ymm9)
#define SAVE_h_m2n1 \
"vinsertf128 $1,%%xmm4,%%ymm4,%%ymm4; vpermilpd $12,%%ymm4,%%ymm4; vfmadd213pd (%2),%%ymm0,%%ymm4; vmovupd %%ymm4,(%2);"
#define SAVE_h_m2n2 \
"vinsertf128 $1,%%xmm5,%%ymm4,%%ymm4; vunpcklpd %%ymm4,%%ymm4,%%ymm1; vunpckhpd %%ymm4,%%ymm4,%%ymm2;"\
"vfmadd213pd (%2),%%ymm0,%%ymm1; vmovupd %%ymm1,(%2);"\
"vfmadd213pd (%2,%3,1),%%ymm0,%%ymm2; vmovupd %%ymm2,(%2,%3,1);"
#define unit_save_m2n4(c1,c2) \
"vperm2f128 $2,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;"\
"vperm2f128 $19,"#c1","#c2",%%ymm1; vunpcklpd %%ymm1,%%ymm1,%%ymm2; vunpckhpd %%ymm1,%%ymm1,%%ymm3;"\
"vfmadd213pd (%5),%%ymm0,%%ymm2; vfmadd213pd (%5,%3,1),%%ymm0,%%ymm3; vmovupd %%ymm2,(%5); vmovupd %%ymm3,(%5,%3,1); leaq (%5,%3,2),%5;"
#define SAVE_h_m2n4 "movq %2,%5;" unit_save_m2n4(%%ymm4,%%ymm5)
#define SAVE_h_m2n8 SAVE_h_m2n4 unit_save_m2n4(%%ymm6,%%ymm7)
#define SAVE_h_m2n12 SAVE_h_m2n8 unit_save_m2n4(%%ymm8,%%ymm9)
#define SAVE_m2(ndim) SAVE_h_m2n##ndim "addq $32,%2;"
#define COMPUTE_m2(ndim) \
INIT_m2n##ndim\
"movq %%r13,%4; movq %%r14,%1;"\
#ndim"002022:\n\t"\
"testq %4,%4; jz "#ndim"002023f;"\
KERNEL_k1m2n##ndim\
"decq %4; jmp "#ndim"002022b;"\
#ndim"002023:\n\t"\
SAVE_m2(ndim)
/* m = 1 *//* vmm0 for alpha, vmm1-vmm3 and vmm10-vmm15 for temporary use, vmm4-vmm6 for accumulators */
#define KERNEL_k1m1n1 \
"vmovsd (%0),%%xmm1; addq $8,%0;"\
"vfmadd231sd (%1),%%xmm1,%%xmm4; addq $8,%1;"
#define KERNEL_k1m1n2 \
"vmovddup (%0),%%xmm1; addq $8,%0;"\
"vfmadd231pd (%1),%%xmm1,%%xmm4; addq $16,%1;"
#define unit_kernel_k1m1n4(c1,...) \
"vmovupd ("#__VA_ARGS__"),%%ymm2; vfmadd231pd %%ymm1,%%ymm2,"#c1";"
#define KERNEL_h_k1m1n4 \
"vbroadcastsd (%0),%%ymm1; addq $8,%0;"\
unit_kernel_k1m1n4(%%ymm4,%1)
#define KERNEL_k1m1n4 KERNEL_h_k1m1n4 "addq $32,%1;"
#define KERNEL_h_k1m1n8 KERNEL_h_k1m1n4 unit_kernel_k1m1n4(%%ymm5,%1,%%r12,1)
#define KERNEL_k1m1n8 KERNEL_h_k1m1n8 "addq $32,%1;"
#define KERNEL_h_k1m1n12 KERNEL_h_k1m1n8 unit_kernel_k1m1n4(%%ymm6,%1,%%r12,2)
#define KERNEL_k1m1n12 KERNEL_h_k1m1n12 "addq $32,%1;"
#define INIT_m1n1 INIT_m2n1
#define INIT_m1n2 INIT_m2n1
#define INIT_m1n4 "vpxor %%ymm4,%%ymm4,%%ymm4;"
#define INIT_m1n8 INIT_m1n4 "vpxor %%ymm5,%%ymm5,%%ymm5;"
#define INIT_m1n12 INIT_m1n8 "vpxor %%ymm6,%%ymm6,%%ymm6;"
#define SAVE_h_m1n1 \
"vmovddup %%xmm4,%%xmm4; vfmadd213pd (%2),%%xmm0,%%xmm4; vmovupd %%xmm4,(%2);"
#define SAVE_h_m1n2 \
"vunpcklpd %%xmm4,%%xmm4,%%xmm1; vunpckhpd %%xmm4,%%xmm4,%%xmm2;"\
"vfmadd213pd (%2),%%xmm0,%%xmm1; vmovupd %%xmm1,(%2);"\
"vfmadd213pd (%2,%3,1),%%xmm0,%%xmm2; vmovupd %%xmm2,(%2,%3,1);"
#define unit_save_m1n4(c1) \
"vunpcklpd "#c1","#c1",%%ymm1; vunpckhpd "#c1","#c1",%%ymm2;"\
"vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\
"vfmadd213pd %%ymm3,%%ymm0,%%ymm1; vmovupd %%xmm1,(%5); vextractf128 $1,%%ymm1,(%5,%3,2); addq %3,%5;"\
"vmovupd (%5),%%xmm3; vinsertf128 $1,(%5,%3,2),%%ymm3,%%ymm3;"\
"vfmadd213pd %%ymm3,%%ymm0,%%ymm2; vmovupd %%xmm2,(%5); vextractf128 $1,%%ymm2,(%5,%3,2); addq %3,%5; leaq (%5,%3,2),%5;"
#define SAVE_h_m1n4 "movq %2,%5;" unit_save_m1n4(%%ymm4)
#define SAVE_h_m1n8 SAVE_h_m1n4 unit_save_m1n4(%%ymm5)
#define SAVE_h_m1n12 SAVE_h_m1n8 unit_save_m1n4(%%ymm6)
#define SAVE_m1(ndim) SAVE_h_m1n##ndim "addq $16,%2;"
#define COMPUTE_m1(ndim) \
INIT_m1n##ndim\
"movq %%r13,%4; movq %%r14,%1;"\
#ndim"001011:\n\t"\
"testq %4,%4; jz "#ndim"001012f;"\
KERNEL_k1m1n##ndim\
"decq %4; jmp "#ndim"001011b;"\
#ndim"001012:\n\t"\
SAVE_m1(ndim)
#define COMPUTE(ndim) {\
next_b = b_pointer + ndim * K;\
__asm__ __volatile__(\
"vbroadcastf128 (%6),%%ymm0;"\
"movq %4,%%r13; movq %4,%%r12; salq $5,%%r12; movq %1,%%r14; movq %7,%%r11;"\
"cmpq $4,%7;jb 33101"#ndim"f;"\
"33109"#ndim":\n\t"\
COMPUTE_m4(ndim)\
"subq $4,%7;cmpq $4,%7;jnb 33109"#ndim"b;"\
"33101"#ndim":\n\t"\
"cmpq $2,%7;jb 33104"#ndim"f;"\
COMPUTE_m2(ndim)\
"subq $2,%7;"\
"33104"#ndim":\n\t"\
"testq %7,%7;jz 33105"#ndim"f;"\
COMPUTE_m1(ndim)\
"33105"#ndim":\n\t"\
"movq %%r13,%4; movq %%r14,%1; movq %%r11,%7;"\
:"+r"(a_pointer),"+r"(b_pointer),"+r"(c_pointer),"+r"(ldc_in_bytes),"+r"(K),"+r"(ctemp),"+r"(const_val),"+r"(M),"+r"(next_b)\
::"r11","r12","r13","r14","r15","xmm0","xmm1","xmm2","xmm3","xmm4","xmm5","xmm6","xmm7","xmm8","xmm9","xmm10","xmm11","xmm12","xmm13","xmm14",\
"xmm15","cc","memory");\
a_pointer -= M * K; b_pointer += ndim * K; c_pointer += 2*(LDC * ndim - M);\
}
int __attribute__ ((noinline))
CNAME(BLASLONG m, BLASLONG n, BLASLONG k, double alphar, double alphai, double * __restrict__ A, double * __restrict__ B, double * __restrict__ C, BLASLONG LDC)
{
if(m==0||n==0||k==0) return 0;
int64_t ldc_in_bytes = (int64_t)LDC * sizeof(double) * 2;
double constval[2]; constval[0] = alphar; constval[1] = alphai;
double *const_val=constval;
int64_t M = (int64_t)m, K = (int64_t)k;
BLASLONG n_count = n;
double *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B;
for(;n_count>11;n_count-=12) COMPUTE(12)
for(;n_count>7;n_count-=8) COMPUTE(8)
for(;n_count>3;n_count-=4) COMPUTE(4)
for(;n_count>1;n_count-=2) COMPUTE(2)
if(n_count>0) COMPUTE(1)
return 0;
}

View File

@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMINCOPYOBJ = sgemm_incopy.o
SGEMMITCOPYOBJ = sgemm_itcopy.o
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = ctrmm4x4V.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = ztrmm4x4V.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c

View File

@ -96,10 +96,10 @@ SGEMMINCOPY = ../generic/gemm_ncopy_8.c
SGEMMITCOPY = ../generic/gemm_tcopy_8.c
SGEMMONCOPY = ../generic/gemm_ncopy_4.c
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
SGEMMINCOPYOBJ = sgemm_incopy.o
SGEMMITCOPYOBJ = sgemm_itcopy.o
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
@ -108,16 +108,16 @@ DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
DGEMMONCOPY = ../generic/gemm_ncopy_4.c
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c
DGEMMINCOPYOBJ = dgemm_incopy.o
DGEMMITCOPYOBJ = dgemm_itcopy.o
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = ctrmm4x4V.S
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = ztrmm4x4V.S
ZGEMMONCOPY = ../generic/zgemm_ncopy_4.c

View File

@ -94,26 +94,26 @@ ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
SGEMMONCOPYOBJ = sgemm_oncopy.o
SGEMMOTCOPYOBJ = sgemm_otcopy.o
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPYOBJ = dgemm_oncopy.o
DGEMMOTCOPYOBJ = dgemm_otcopy.o
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
CGEMMONCOPYOBJ = cgemm_oncopy.o
CGEMMOTCOPYOBJ = cgemm_otcopy.o
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
ZGEMMONCOPYOBJ = zgemm_oncopy.o
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

View File

@ -0,0 +1,38 @@
image:
- Visual Studio 2017
configuration: Release
clone_depth: 3
matrix:
fast_finish: false
skip_commits:
# Add [av skip] to commit messages
message: /\[av skip\]/
cache:
- '%APPVEYOR_BUILD_FOLDER%\build'
environment:
global:
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
install:
- call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
- conda config --add channels conda-forge --force
- conda install --yes --quiet flang jom
- call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64
- set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
- set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
before_build:
- ps: if (-Not (Test-Path .\build)) { mkdir build }
- cd build
- cmake -G "NMake Makefiles JOM" -DCMAKE_Fortran_COMPILER=flang -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON ..
build_script:
- cmake --build .
test_script:
- ctest -j2

View File

@ -35,3 +35,9 @@ LAPACKE/example/xexample*
# SED
SRC/*-e
LAPACKE/src/*-e
build*
# DOCS documentation
DOCS/man
DOCS/explore-html
output_err

View File

@ -1,33 +1,32 @@
language: cpp
language: c
dist: xenial
group: travis_latest
git:
depth: 3
quiet: true
addons:
apt:
sources:
- george-edison55-precise-backports # cmake
packages:
- cmake
- cmake-data
- gfortran
- gfortran
os:
- linux
- osx
env:
- CMAKE_BUILD_TYPE=Release
- CMAKE_BUILD_TYPE=Coverage
install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]];
then
for pkg in gcc cmake; do
if brew list -1 | grep -q "^${pkg}\$"; then
brew outdated $pkg || brew upgrade $pkg;
else
brew install $pkg;
fi
done
fi
matrix:
include:
- os: linux
env: CMAKE_BUILD_TYPE=Release
- os: linux
env: CMAKE_BUILD_TYPE=Coverage
- os: osx
env: CMAKE_BUILD_TYPE=Release
before_install:
- brew update > /dev/null
- brew install gcc > /dev/null
- os: osx
env: CMAKE_BUILD_TYPE=Coverage
before_install:
- brew update > /dev/null
- brew install gcc > /dev/null
script:
- export PR=https://api.github.com/repos/$TRAVIS_REPO_SLUG/pulls/$TRAVIS_PULL_REQUEST

View File

@ -6,4 +6,5 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/blas.pc.in ${CMAKE_CURRENT_BINARY_DIR
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/blas.pc
DESTINATION ${PKG_CONFIG_DIR}
COMPONENT Development
)

View File

@ -1,13 +1,18 @@
include ../make.inc
TOPSRCDIR = ..
include $(TOPSRCDIR)/make.inc
.PHONY: all
all: blas
.PHONY: blas
blas:
$(MAKE) -C SRC
.PHONY: blas_testing
blas_testing: blas
$(MAKE) -C TESTING run
.PHONY: clean cleanobj cleanlib cleanexe cleantest
clean:
$(MAKE) -C SRC clean
$(MAKE) -C TESTING clean

View File

@ -1,5 +1,3 @@
include ../../make.inc
#######################################################################
# This is the makefile to create a library for the BLAS.
# The files are grouped as follows:
@ -55,6 +53,10 @@ include ../../make.inc
#
#######################################################################
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc
.PHONY: all
all: $(BLASLIB)
#---------------------------------------------------------
@ -138,33 +140,32 @@ ALLOBJ = $(SBLAS1) $(SBLAS2) $(SBLAS3) $(DBLAS1) $(DBLAS2) $(DBLAS3) \
$(ZBLAS2) $(ZBLAS3) $(ALLBLAS)
$(BLASLIB): $(ALLOBJ)
$(ARCH) $(ARCHFLAGS) $@ $^
$(AR) $(ARFLAGS) $@ $^
$(RANLIB) $@
.PHONY: single double complex complex16
single: $(SBLAS1) $(ALLBLAS) $(SBLAS2) $(SBLAS3)
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
$(AR) $(ARFLAGS) $(BLASLIB) $^
$(RANLIB) $(BLASLIB)
double: $(DBLAS1) $(ALLBLAS) $(DBLAS2) $(DBLAS3)
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
$(AR) $(ARFLAGS) $(BLASLIB) $^
$(RANLIB) $(BLASLIB)
complex: $(CBLAS1) $(CB1AUX) $(ALLBLAS) $(CBLAS2) $(CBLAS3)
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
$(AR) $(ARFLAGS) $(BLASLIB) $^
$(RANLIB) $(BLASLIB)
complex16: $(ZBLAS1) $(ZB1AUX) $(ALLBLAS) $(ZBLAS2) $(ZBLAS3)
$(ARCH) $(ARCHFLAGS) $(BLASLIB) $^
$(AR) $(ARFLAGS) $(BLASLIB) $^
$(RANLIB) $(BLASLIB)
FRC:
@FRC=$(FRC)
.PHONY: clean cleanobj cleanlib
clean: cleanobj cleanlib
cleanobj:
rm -f *.o
cleanlib:
#rm -f $(BLASLIB) # May point to a system lib, e.g. -lblas
.f.o:
$(FORTRAN) $(OPTS) -c -o $@ $<

View File

@ -43,7 +43,7 @@
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of SX
*> storage spacing between elements of CX
*> \endverbatim
*
* Authors:

View File

@ -43,7 +43,7 @@
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of SX
*> storage spacing between elements of DX
*> \endverbatim
*
* Authors:

View File

@ -43,7 +43,7 @@
*> \param[in] INCX
*> \verbatim
*> INCX is INTEGER
*> storage spacing between elements of SX
*> storage spacing between elements of ZX
*> \endverbatim
*
* Authors:

View File

@ -0,0 +1,29 @@
SBLAS1 = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'sdot.f', 'snrm2.f', 'srot.f', 'srotg.f', 'sscal.f', 'sswap.f', 'sdsdot.f', 'srotmg.f', 'srotm.f')
CBLAS1 = files('scabs1.f', 'scasum.f', 'scnrm2.f', 'icamax.f', 'caxpy.f', 'ccopy.f', 'cdotc.f', 'cdotu.f', 'csscal.f', 'crotg.f', 'cscal.f', 'cswap.f', 'csrot.f')
DBLAS1 = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'ddot.f', 'dnrm2.f', 'drot.f', 'drotg.f', 'dscal.f', 'dsdot.f', 'dswap.f', 'drotmg.f', 'drotm.f')
ZBLAS1 = files('dcabs1.f', 'dzasum.f', 'dznrm2.f', 'izamax.f', 'zaxpy.f', 'zcopy.f', 'zdotc.f', 'zdotu.f', 'zdscal.f', 'zrotg.f', 'zscal.f', 'zswap.f', 'zdrot.f')
CB1AUX = files('isamax.f', 'sasum.f', 'saxpy.f', 'scopy.f', 'snrm2.f', 'sscal.f')
ZB1AUX = files('idamax.f', 'dasum.f', 'daxpy.f', 'dcopy.f', 'dnrm2.f', 'dscal.f')
ALLBLAS = files('lsame.f', 'xerbla.f', 'xerbla_array.f')
SBLAS2 = files('sgemv.f', 'sgbmv.f', 'ssymv.f', 'ssbmv.f', 'sspmv.f', 'strmv.f', 'stbmv.f', 'stpmv.f', 'strsv.f', 'stbsv.f', 'stpsv.f', 'sger.f', 'ssyr.f', 'sspr.f', 'ssyr2.f', 'sspr2.f')
CBLAS2 = files('cgemv.f', 'cgbmv.f', 'chemv.f', 'chbmv.f', 'chpmv.f', 'ctrmv.f', 'ctbmv.f', 'ctpmv.f', 'ctrsv.f', 'ctbsv.f', 'ctpsv.f', 'cgerc.f', 'cgeru.f', 'cher.f', 'chpr.f', 'cher2.f', 'chpr2.f')
DBLAS2 = files('dgemv.f', 'dgbmv.f', 'dsymv.f', 'dsbmv.f', 'dspmv.f', 'dtrmv.f', 'dtbmv.f', 'dtpmv.f', 'dtrsv.f', 'dtbsv.f', 'dtpsv.f', 'dger.f', 'dsyr.f', 'dspr.f', 'dsyr2.f', 'dspr2.f')
ZBLAS2 = files('zgemv.f', 'zgbmv.f', 'zhemv.f', 'zhbmv.f', 'zhpmv.f', 'ztrmv.f', 'ztbmv.f', 'ztpmv.f', 'ztrsv.f', 'ztbsv.f', 'ztpsv.f', 'zgerc.f', 'zgeru.f', 'zher.f', 'zhpr.f', 'zher2.f', 'zhpr2.f')
SBLAS3 = files('sgemm.f', 'ssymm.f', 'ssyrk.f', 'ssyr2k.f', 'strmm.f', 'strsm.f')
CBLAS3 = files('cgemm.f', 'csymm.f', 'csyrk.f', 'csyr2k.f', 'ctrmm.f', 'ctrsm.f', 'chemm.f', 'cherk.f', 'cher2k.f')
DBLAS3 = files('dgemm.f', 'dsymm.f', 'dsyrk.f', 'dsyr2k.f', 'dtrmm.f', 'dtrsm.f')
ZBLAS3 = files('zgemm.f', 'zsymm.f', 'zsyrk.f', 'zsyr2k.f', 'ztrmm.f', 'ztrsm.f', 'zhemm.f', 'zherk.f', 'zher2k.f')

View File

@ -23,13 +23,13 @@
*>
*> \verbatim
*>
* Compute the inner product of two vectors with extended
* precision accumulation.
*
* Returns S.P. result with dot product accumulated in D.P.
* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
* defined in a similar way using INCY.
*> Compute the inner product of two vectors with extended
*> precision accumulation.
*>
*> Returns S.P. result with dot product accumulated in D.P.
*> SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
*> where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
*> defined in a similar way using INCY.
*> \endverbatim
*
* Arguments:
@ -77,7 +77,14 @@
*> \author Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
*> \author Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
*
*> \ingroup complex_blas_level1
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2017
*
*> \ingroup single_blas_level1
*
*> \par Further Details:
* =====================
@ -102,65 +109,7 @@
*> 920501 Reformatted the REFERENCES section. (WRB)
*> 070118 Reformat to LAPACK coding style
*> \endverbatim
*
* =====================================================================
*
* .. Local Scalars ..
* DOUBLE PRECISION DSDOT
* INTEGER I,KX,KY,NS
* ..
* .. Intrinsic Functions ..
* INTRINSIC DBLE
* ..
* DSDOT = SB
* IF (N.LE.0) THEN
* SDSDOT = DSDOT
* RETURN
* END IF
* IF (INCX.EQ.INCY .AND. INCX.GT.0) THEN
*
* Code for equal and positive increments.
*
* NS = N*INCX
* DO I = 1,NS,INCX
* DSDOT = DSDOT + DBLE(SX(I))*DBLE(SY(I))
* END DO
* ELSE
*
* Code for unequal or nonpositive increments.
*
* KX = 1
* KY = 1
* IF (INCX.LT.0) KX = 1 + (1-N)*INCX
* IF (INCY.LT.0) KY = 1 + (1-N)*INCY
* DO I = 1,N
* DSDOT = DSDOT + DBLE(SX(KX))*DBLE(SY(KY))
* KX = KX + INCX
* KY = KY + INCY
* END DO
* END IF
* SDSDOT = DSDOT
* RETURN
* END
*
*> \par Purpose:
* =============
*>
*> \verbatim
*> \endverbatim
*
* Authors:
* ========
*
*> \author Univ. of Tennessee
*> \author Univ. of California Berkeley
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date November 2017
*
*> \ingroup single_blas_level1
*
* =====================================================================
REAL FUNCTION SDSDOT(N,SB,SX,INCX,SY,INCY)
*
@ -175,71 +124,6 @@
* ..
* .. Array Arguments ..
REAL SX(*),SY(*)
* ..
*
* PURPOSE
* =======
*
* Compute the inner product of two vectors with extended
* precision accumulation.
*
* Returns S.P. result with dot product accumulated in D.P.
* SDSDOT = SB + sum for I = 0 to N-1 of SX(LX+I*INCX)*SY(LY+I*INCY),
* where LX = 1 if INCX .GE. 0, else LX = 1+(1-N)*INCX, and LY is
* defined in a similar way using INCY.
*
* AUTHOR
* ======
* Lawson, C. L., (JPL), Hanson, R. J., (SNLA),
* Kincaid, D. R., (U. of Texas), Krogh, F. T., (JPL)
*
* ARGUMENTS
* =========
*
* N (input) INTEGER
* number of elements in input vector(s)
*
* SB (input) REAL
* single precision scalar to be added to inner product
*
* SX (input) REAL array, dimension (N)
* single precision vector with N elements
*
* INCX (input) INTEGER
* storage spacing between elements of SX
*
* SY (input) REAL array, dimension (N)
* single precision vector with N elements
*
* INCY (input) INTEGER
* storage spacing between elements of SY
*
* SDSDOT (output) REAL
* single precision dot product (SB if N .LE. 0)
*
* Further Details
* ===============
*
* REFERENCES
*
* C. L. Lawson, R. J. Hanson, D. R. Kincaid and F. T.
* Krogh, Basic linear algebra subprograms for Fortran
* usage, Algorithm No. 539, Transactions on Mathematical
* Software 5, 3 (September 1979), pp. 308-323.
*
* REVISION HISTORY (YYMMDD)
*
* 791001 DATE WRITTEN
* 890531 Changed all specific intrinsics to generic. (WRB)
* 890831 Modified array declarations. (WRB)
* 890831 REVISION DATE from Version 3.2
* 891214 Prologue converted to Version 4.0 format. (BAB)
* 920310 Corrected definition of LX in DESCRIPTION. (WRB)
* 920501 Reformatted the REFERENCES section. (WRB)
* 070118 Reformat to LAPACK coding style
*
* =====================================================================
*
* .. Local Scalars ..
DOUBLE PRECISION DSDOT
INTEGER I,KX,KY,NS

View File

@ -1,5 +1,7 @@
include ../../make.inc
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc
.PHONY: all single double complex complex16
all: single double complex complex16
single: xblat1s xblat2s xblat3s
double: xblat1d xblat2d xblat3d
@ -7,32 +9,33 @@ complex: xblat1c xblat2c xblat3c
complex16: xblat1z xblat2z xblat3z
xblat1s: sblat1.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat1d: dblat1.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat1c: cblat1.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat1z: zblat1.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat2s: sblat2.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat2d: dblat2.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat2c: cblat2.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat2z: zblat2.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat3s: sblat3.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat3d: dblat3.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat3c: cblat3.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xblat3z: zblat3.o $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
.PHONY: run
run: all
./xblat1s > sblat1.out
./xblat1d > dblat1.out
@ -47,6 +50,7 @@ run: all
./xblat3c < cblat3.in
./xblat3z < zblat3.in
.PHONY: clean cleanobj cleanexe cleantest
clean: cleanobj cleanexe cleantest
cleanobj:
rm -f *.o
@ -54,6 +58,3 @@ cleanexe:
rm -f xblat*
cleantest:
rm -f *.out core
.f.o:
$(FORTRAN) $(OPTS) -c -o $@ $<

View File

@ -619,7 +619,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -991,7 +991,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -946,7 +946,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -619,7 +619,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -12,8 +12,10 @@ FortranCInterface_HEADER(${LAPACK_BINARY_DIR}/include/cblas_mangling.h
SYMBOL_NAMESPACE "F77_")
if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND)
message(WARNING "Reverting to pre-defined include/lapacke_mangling.h")
configure_file(include/lapacke_mangling_with_flags.h.in
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h)
configure_file(include/lapacke_mangling_with_flags.h.in
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h)
configure_file(include/cblas_mangling_with_flags.h.in
${LAPACK_BINARY_DIR}/include/cblas_mangling.h)
endif()
include_directories(include ${LAPACK_BINARY_DIR}/include)
@ -28,7 +30,10 @@ endforeach()
endmacro()
append_subdir_files(CBLAS_INCLUDE "include")
install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(FILES ${CBLAS_INCLUDE} ${LAPACK_BINARY_DIR}/include/cblas_mangling.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Development
)
# --------------------------------------------------
if(BUILD_TESTING)
@ -45,7 +50,9 @@ endif()
set(_cblas_config_install_guard_target "")
if(ALL_TARGETS)
install(EXPORT cblas-targets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION})
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
COMPONENT Development
)
# Choose one of the cblas targets to use as a guard for
# cblas-config.cmake to load targets from the install tree.
list(GET ALL_TARGETS 0 _cblas_config_install_guard_target)
@ -82,4 +89,6 @@ install(FILES
)
#install(EXPORT cblas-targets
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION})
# DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cblas-${LAPACK_VERSION}
# COMPONENT Development
# )

View File

@ -1,19 +1,25 @@
include ../make.inc
TOPSRCDIR = ..
include $(TOPSRCDIR)/make.inc
.PHONY: all
all: cblas
.PHONY: cblas
cblas: include/cblas_mangling.h
$(MAKE) -C src
include/cblas_mangling.h: include/cblas_mangling_with_flags.h.in
cp $< $@
cp include/cblas_mangling_with_flags.h.in $@
.PHONY: cblas_testing
cblas_testing: cblas
$(MAKE) -C testing run
.PHONY: cblas_example
cblas_example: cblas
$(MAKE) -C examples
.PHONY: clean cleanobj cleanlib cleanexe cleantest
clean:
$(MAKE) -C src clean
$(MAKE) -C testing clean

View File

@ -1,17 +1,21 @@
include ../../make.inc
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc
.SUFFIXES: .c .o
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<
.PHONY: all
all: cblas_ex1 cblas_ex2
cblas_ex1: cblas_example1.o $(CBLASLIB) $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
cblas_ex2: cblas_example2.o $(CBLASLIB) $(BLASLIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
.PHONY: clean cleanobj cleanexe
clean: cleanobj cleanexe
cleanobj:
rm -f *.o
cleanexe:
rm -f cblas_ex1 cblas_ex2
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<

View File

@ -47,7 +47,7 @@ int main ( )
a[m*3+1] = 6;
a[m*3+2] = 7;
a[m*3+3] = 8;
/* The elemetns of x and y */
/* The elements of x and y */
x[0] = 1;
x[1] = 2;
x[2] = 1;

View File

@ -1,7 +1,13 @@
# This Makefile compiles the CBLAS routines
include ../../make.inc
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc
.SUFFIXES: .c .o
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<
.PHONY: all
all: $(CBLASLIB)
# Error handling routines for level 2 & 3
@ -43,24 +49,25 @@ zlev1 = cblas_zswap.o cblas_zscal.o cblas_zdscal.o cblas_zcopy.o \
# Common files for level 1 single precision
sclev1 = cblas_scasum.o scasumsub.o cblas_scnrm2.o scnrm2sub.o
.PHONY: slib1 dlib1 clib1 zlib1
# Single precision real
slib1: $(slev1) $(sclev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Double precision real
dlib1: $(dlev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Single precision complex
clib1: $(clev1) $(sclev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Double precision complex
zlib1: $(zlev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
#
@ -95,24 +102,25 @@ zlev2 = cblas_zgemv.o cblas_zgbmv.o cblas_zhemv.o cblas_zhbmv.o cblas_zhpmv.o \
cblas_ztpsv.o cblas_zgeru.o cblas_zgerc.o cblas_zher.o cblas_zher2.o \
cblas_zhpr.o cblas_zhpr2.o
.PHONY: slib2 dlib2 clib2 zlib2
# Single precision real
slib2: $(slev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Double precision real
dlib2: $(dlev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Single precision complex
clib2: $(clev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Double precision complex
zlib2: $(zlev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
#
@ -141,24 +149,25 @@ zlev3 = cblas_zgemm.o cblas_zsymm.o cblas_zhemm.o cblas_zherk.o \
cblas_zher2k.o cblas_ztrmm.o cblas_ztrsm.o cblas_zsyrk.o \
cblas_zsyr2k.o
.PHONY: slib3 dlib3 clib3 zlib3
# Single precision real
slib3: $(slev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Double precision real
dlib3: $(dlev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Single precision complex
clib3: $(clev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# Double precision complex
zlib3: $(zlev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
@ -166,36 +175,33 @@ alev1 = $(slev1) $(dlev1) $(clev1) $(zlev1) $(sclev1)
alev2 = $(slev2) $(dlev2) $(clev2) $(zlev2)
alev3 = $(slev3) $(dlev3) $(clev3) $(zlev3)
.PHONY: all1 all2 all3
# All level 1
all1: $(alev1)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# All level 2
all2: $(alev2) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# All level 3
all3: $(alev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $(CBLASLIB) $^
$(AR) $(ARFLAGS) $(CBLASLIB) $^
$(RANLIB) $(CBLASLIB)
# All levels and precisions
$(CBLASLIB): $(alev1) $(alev2) $(alev3) $(errhand)
$(ARCH) $(ARCHFLAGS) $@ $^
$(AR) $(ARFLAGS) $@ $^
$(RANLIB) $@
FRC:
@FRC=$(FRC)
.PHONY: clean cleanobj cleanlib
clean: cleanobj cleanlib
cleanobj:
rm -f *.o
cleanlib:
rm -f $(CBLASLIB)
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<
.f.o:
$(FORTRAN) $(OPTS) -c -o $@ $<

View File

@ -91,7 +91,7 @@ void cblas_sgemm(const CBLAS_LAYOUT layout, const CBLAS_TRANSPOSE TransA,
else
{
cblas_xerbla(2, "cblas_sgemm",
"Illegal TransA setting, %d\n", TransA);
"Illegal TransB setting, %d\n", TransB);
CBLAS_CallFromC = 0;
RowMajorStrg = 0;
return;

View File

@ -2,7 +2,12 @@
# The Makefile compiles c wrappers and testers for CBLAS.
#
include ../../make.inc
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc
.SUFFIXES: .c .o
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<
# Archive files necessary to compile
LIB = $(CBLASLIB) $(BLASLIB)
@ -27,6 +32,7 @@ ztestl1o = c_zblas1.o
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o
.PHONY: all all1 all2 all3
all: all1 all2 all3
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
@ -38,37 +44,38 @@ all3: xscblat3 xdcblat3 xccblat3 xzcblat3
# Single real
xscblat1: c_sblat1.o $(stestl1o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xscblat2: c_sblat2.o $(stestl2o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xscblat3: c_sblat3.o $(stestl3o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
# Double real
xdcblat1: c_dblat1.o $(dtestl1o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xdcblat2: c_dblat2.o $(dtestl2o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xdcblat3: c_dblat3.o $(dtestl3o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
# Single complex
xccblat1: c_cblat1.o $(ctestl1o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xccblat2: c_cblat2.o $(ctestl2o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xccblat3: c_cblat3.o $(ctestl3o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
# Double complex
xzcblat1: c_zblat1.o $(ztestl1o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xzcblat2: c_zblat2.o $(ztestl2o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
xzcblat3: c_zblat3.o $(ztestl3o) $(LIB)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
# RUN TESTS
.PHONY: run
run: all
@echo "--> TESTING CBLAS 1 - SINGLE PRECISION REAL <--"
@./xscblat1 > stest1.out
@ -95,6 +102,7 @@ run: all
@echo "--> TESTING CBLAS 3 - DOUBLE PRECISION COMPLEX <--"
@./xzcblat3 < zin3 > ztest3.out
.PHONY: clean cleanobj cleanexe cleantest
clean: cleanobj cleanexe cleantest
cleanobj:
rm -f *.o
@ -102,9 +110,3 @@ cleanexe:
rm -f x*
cleantest:
rm -f *.out core
.SUFFIXES: .o .f .c
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<
.f.o:
$(FORTRAN) $(OPTS) -c -o $@ $<

View File

@ -577,7 +577,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -653,7 +653,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -653,7 +653,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -577,7 +577,7 @@
SUBROUTINE STEST1(SCOMP1,STRUE1,SSIZE,SFAC)
* ************************* STEST1 *****************************
*
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMODATE THE FORTRAN
* THIS IS AN INTERFACE SUBROUTINE TO ACCOMMODATE THE FORTRAN
* REQUIREMENT THAT WHEN A DUMMY ARGUMENT IS AN ARRAY, THE
* ACTUAL ARGUMENT MUST ALSO BE AN ARRAY OR AN ARRAY ELEMENT.
*

View File

@ -1,4 +1,4 @@
# This module checks against various known compilers and thier respective
# This module checks against various known compilers and their respective
# flags to determine any specific flags needing to be set.
#
# 1. If FPE traps are enabled either abort or disable them

View File

@ -20,7 +20,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY})
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
foreach (LANG ${ENABLED_LANGUAGES})
# Gcov evaluation is dependend on the used compiler. Check gcov support for
# Gcov evaluation is dependent on the used compiler. Check gcov support for
# each compiler that is used. If gcov binary was already found for this
# compiler, do not try to find it again.
if(NOT GCOV_${CMAKE_${LANG}_COMPILER_ID}_BIN)

View File

@ -42,7 +42,7 @@ set(CMAKE_REQUIRED_QUIET ${codecov_FIND_QUIETLY})
get_property(ENABLED_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
foreach (LANG ${ENABLED_LANGUAGES})
# Coverage flags are not dependend on language, but the used compiler. So
# Coverage flags are not dependent on language, but the used compiler. So
# instead of searching flags foreach language, search flags foreach compiler
# used.
set(COMPILER ${CMAKE_${LANG}_COMPILER_ID})

View File

@ -24,7 +24,7 @@ message(STATUS "=========")
set(F77_OUTPUT_EXE "/Fe" CACHE INTERNAL
"Fortran compiler option for setting executable file name.")
else()
# in other case, let user specify their fortran configrations.
# in other case, let user specify their fortran configurations.
set(F77_OPTION_COMPILE "-c" CACHE STRING
"Fortran compiler option for compiling without linking.")
set(F77_OUTPUT_OBJ "-o" CACHE STRING

View File

@ -5,6 +5,10 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
endif()
unset(_LAPACK_TARGET)
# Hint for project building against lapack
set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@")
# Report the blas and lapack raw or imported libraries.
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@")
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@")
set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES})

View File

@ -8,8 +8,12 @@ if(_LAPACK_TARGET AND NOT TARGET "${_LAPACK_TARGET}")
endif()
unset(_LAPACK_TARGET)
# Hint for project building against lapack
set(LAPACK_Fortran_COMPILER_ID "@CMAKE_Fortran_COMPILER_ID@")
# Report the blas and lapack raw or imported libraries.
set(LAPACK_blas_LIBRARIES "@BLAS_LIBRARIES@")
set(LAPACK_lapack_LIBRARIES "@LAPACK_LIBRARIES@")
set(LAPACK_LIBRARIES ${LAPACK_blas_LIBRARIES} ${LAPACK_lapack_LIBRARIES})
unset(_LAPACK_SELF_DIR)

View File

@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 2.8.12)
project(LAPACK Fortran C)
set(LAPACK_MAJOR_VERSION 3)
set(LAPACK_MINOR_VERSION 8)
set(LAPACK_MINOR_VERSION 9)
set(LAPACK_PATCH_VERSION 0)
set(
LAPACK_VERSION
@ -13,6 +13,9 @@ set(
# Add the CMake directory for custon CMake modules
set(CMAKE_MODULE_PATH "${LAPACK_SOURCE_DIR}/CMAKE" ${CMAKE_MODULE_PATH})
# Export all symbols on Windows when building shared libraries
SET(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS TRUE)
# Set a default build type if none was specified
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to 'Release' as none was specified.")
@ -21,8 +24,19 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo" "Coverage")
endif()
string(TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UPPER)
if(${CMAKE_BUILD_TYPE_UPPER} STREQUAL "COVERAGE")
# Coverage
set(_is_coverage_build 0)
set(_msg "Checking if build type is 'Coverage'")
message(STATUS "${_msg}")
if(NOT CMAKE_CONFIGURATION_TYPES)
string(TOLOWER ${CMAKE_BUILD_TYPE} _build_type_lc)
if(${_build_type_lc} STREQUAL "coverage")
set(_is_coverage_build 1)
endif()
endif()
message(STATUS "${_msg}: ${_is_coverage_build}")
if(_is_coverage_build)
message(STATUS "Adding coverage")
find_package(codecov)
endif()
@ -58,18 +72,18 @@ include(PreventInSourceBuilds)
include(PreventInBuildInstalls)
if(UNIX)
if("${CMAKE_Fortran_COMPILER}" MATCHES "ifort")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -fp-model strict")
if(CMAKE_Fortran_COMPILER_ID STREQUAL Intel)
list(APPEND CMAKE_Fortran_FLAGS "-fp-model strict")
endif()
if("${CMAKE_Fortran_COMPILER}" MATCHES "xlf")
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} -qnosave -qstrict=none")
if(CMAKE_Fortran_COMPILER_ID STREQUAL XL)
list(APPEND CMAKE_Fortran_FLAGS "-qnosave -qstrict=none")
endif()
# Delete libmtsk in linking sequence for Sun/Oracle Fortran Compiler.
# This library is not present in the Sun package SolarisStudio12.3-linux-x86-bin
string(REPLACE \;mtsk\; \; CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES "${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}")
endif()
if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq")
if(CMAKE_Fortran_COMPILER_ID STREQUAL Compaq)
if(WIN32)
if(CMAKE_GENERATOR STREQUAL "NMake Makefiles")
get_filename_component(CMAKE_Fortran_COMPILER_CMDNAM ${CMAKE_Fortran_COMPILER} NAME_WE)
@ -96,24 +110,16 @@ if(CMAKE_Fortran_COMPILER_ID STREQUAL "Compaq")
endif()
endif()
# Get Python
message(STATUS "Looking for Python greater than 2.6 - ${PYTHONINTERP_FOUND}")
find_package(PythonInterp 2.7) # lapack_testing.py uses features from python 2.7 and greater
if(PYTHONINTERP_FOUND)
message(STATUS "Using Python version ${PYTHON_VERSION_STRING}")
else()
message(STATUS "No suitable Python version found, so skipping summary tests.")
endif()
# --------------------------------------------------
# --------------------------------------------------
set(LAPACK_INSTALL_EXPORT_NAME lapack-targets)
macro(lapack_install_library lib)
install(TARGETS ${lib}
EXPORT ${LAPACK_INSTALL_EXPORT_NAME}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT Development
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT RuntimeLibraries
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT RuntimeLibraries
)
endmacro()
@ -121,12 +127,22 @@ set(PKG_CONFIG_DIR ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
# --------------------------------------------------
# Testing
option(BUILD_TESTING "Build tests" OFF)
enable_testing()
option(BUILD_TESTING "Build tests" ${_is_coverage_build})
include(CTest)
enable_testing()
message(STATUS "Build tests: ${BUILD_TESTING}")
# lapack_testing.py uses features from python 2.7 and greater
if(BUILD_TESTING)
set(_msg "Looking for Python >= 2.7 needed for summary tests")
message(STATUS "${_msg}")
find_package(PythonInterp 2.7 QUIET)
if(PYTHONINTERP_FOUND)
message(STATUS "${_msg} - found (${PYTHON_VERSION_STRING})")
else()
message(STATUS "${_msg} - not found (skipping summary tests)")
endif()
endif()
# --------------------------------------------------
# Organize output files. On Windows this also keeps .dll files next
# to the .exe files that need them, making tests easy to run.
@ -299,16 +315,40 @@ if(LAPACKE)
add_subdirectory(LAPACKE)
endif()
#-------------------------------------
# BLAS++ / LAPACK++
option(BLAS++ "Build BLAS++" OFF)
option(LAPACK++ "Build LAPACK++" OFF)
function(_display_cpp_implementation_msg name)
string(TOLOWER ${name} name_lc)
message(STATUS "${name}++ enable")
message(STATUS "----------------")
message(STATUS "Thank you for your interest in ${name}++, a newly developed C++ API for ${name} library")
message(STATUS "The objective of ${name}++ is to provide a convenient, performance oriented API for development in the C++ language, that, for the most part, preserves established conventions, while, at the same time, takes advantages of modern C++ features, such as: namespaces, templates, exceptions, etc.")
message(STATUS "We are still working on integrating ${name}++ in our library. For the moment, you can download directly ${name_lc}++ from https://bitbucket.org/icl/${name_lc}pp")
message(STATUS "For support ${name}++ related question, please email: slate-user@icl.utk.edu")
message(STATUS "----------------")
endfunction()
if(BLAS++)
_display_cpp_implementation_msg("BLAS")
endif()
if(LAPACK++)
_display_cpp_implementation_msg("LAPACK")
endif()
# --------------------------------------------------
# CPACK Packaging
set(CPACK_PACKAGE_NAME "LAPACK")
set(CPACK_PACKAGE_VENDOR "University of Tennessee, Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd")
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "LAPACK- Linear Algebra Package")
set(CPACK_PACKAGE_VERSION_MAJOR 3)
set(CPACK_PACKAGE_VERSION_MINOR 5)
set(CPACK_PACKAGE_VERSION_PATCH 0)
set(CPACK_PACKAGE_VERSION_MAJOR ${LAPACK_MAJOR_VERSION})
set(CPACK_PACKAGE_VERSION_MINOR ${LAPACK_MINOR_VERSION})
set(CPACK_PACKAGE_VERSION_PATCH ${LAPACK_PATCH_VERSION})
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_MONOLITHIC_INSTALL ON)
set(CPACK_PACKAGE_INSTALL_DIRECTORY "LAPACK")
if(WIN32 AND NOT UNIX)
# There is a bug in NSI that does not handle full unix paths properly. Make
@ -347,7 +387,9 @@ endif()
set(_lapack_config_install_guard_target "")
if(ALL_TARGETS)
install(EXPORT lapack-targets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION})
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
COMPONENT Development
)
# Choose one of the lapack targets to use as a guard for
# lapack-config.cmake to load targets from the install tree.
@ -382,6 +424,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapack.pc.in ${CMAKE_CURRENT_BINARY_D
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/lapack.pc
DESTINATION ${PKG_CONFIG_DIR}
COMPONENT Development
)
configure_file(${LAPACK_SOURCE_DIR}/CMAKE/lapack-config-install.cmake.in
@ -398,4 +441,6 @@ install(FILES
${LAPACK_BINARY_DIR}/CMakeFiles/lapack-config.cmake
${LAPACK_BINARY_DIR}/lapack-config-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapack-${LAPACK_VERSION}
COMPONENT Development
)

View File

@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 3.8.0
PROJECT_NUMBER = 3.9.0
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

View File

@ -38,7 +38,7 @@ PROJECT_NAME = LAPACK
# could be handy for archiving the generated documentation or if some version
# control system is used.
PROJECT_NUMBER = 3.8.0
PROJECT_NUMBER = 3.9.0
# Using the PROJECT_BRIEF tag one can provide an optional one line description
# for a project that appears at the top of each page and should give viewer a

View File

@ -439,39 +439,39 @@ SHELL = /bin/sh
\end{quote}
and it will need to be modified to \texttt{SHELL = /sbin/sh} if you are
installing LAPACK on an SGI architecture.
Second, you will
need to modify the \texttt{PLAT} definition, which is appended to all
library names, to specify the architecture to which you are installing
LAPACK. This features avoids confusion in library names when you are
installing LAPACK on more than one architecture. Next, you will need
to modify \texttt{FORTRAN}, \texttt{OPTS}, \texttt{DRVOPTS}, \texttt{NOOPT}, \texttt{LOADER},
and \texttt{LOADOPTS} to specify
Next, you will need to modify \texttt{FC}, \texttt{FFLAGS},
\texttt{FFLAGS\_DRV}, \texttt{FFLAGS\_NOOPT}, and \texttt{LDFLAGS} to specify
the compiler, compiler options, compiler options for the testing and
timing\footnotemark[\value{footnote}] main programs, loader, loader options.
Next you will have to choose which function you will use to time in the \texttt{SECOND} and \texttt{DSECND} routines.
timing\footnotemark[\value{footnote}] main programs, and linker options.
Next you will have to choose which function you will use to time in the
\texttt{SECOND} and \texttt{DSECND} routines.
\begin{verbatim}
#The Default : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME
TIMER = EXT_ETIME
# For RS6K : SECOND and DSECND will use a call to the EXTERNAL FUNCTION ETIME_
# TIMER = EXT_ETIME_
# For gfortran compiler: SECOND and DSECND will use the INTERNAL FUNCTION ETIME
# TIMER = INT_ETIME
# If your Fortran compiler does not provide etime (like Nag Fortran Compiler, etc...)
# SECOND and DSECND will use a call to the INTERNAL FUNCTION CPU_TIME
# TIMER = INT_CPU_TIME
# If neither of this works...you can use the NONE value...
# In that case, SECOND and DSECND will always return 0
# TIMER = NONE
# Default: SECOND and DSECND will use a call to the
# EXTERNAL FUNCTION ETIME
#TIMER = EXT_ETIME
# For RS6K: SECOND and DSECND will use a call to the
# EXTERNAL FUNCTION ETIME_
#TIMER = EXT_ETIME_
# For gfortran compiler: SECOND and DSECND will use a call to the
# INTERNAL FUNCTION ETIME
TIMER = INT_ETIME
# If your Fortran compiler does not provide etime (like Nag Fortran
# Compiler, etc...) SECOND and DSECND will use a call to the
# INTERNAL FUNCTION CPU_TIME
#TIMER = INT_CPU_TIME
# If none of these work, you can use the NONE value.
# In that case, SECOND and DSECND will always return 0.
#TIMER = NONE
\end{verbatim}
Refer to the section~\ref{second} to get more information.
Next, you will need to modify \texttt{ARCH}, \texttt{ARCHFLAGS}, and \texttt{RANLIB} to specify archiver,
Next, you will need to modify \texttt{AR}, \texttt{ARFLAGS}, and \texttt{RANLIB} to specify archiver,
archiver options, and ranlib for your machine. If your architecture
does not require \texttt{ranlib} to be run after each archive command (as
is the case with CRAY computers running UNICOS, Hewlett Packard
computers running HP-UX, or SUN SPARCstations running Solaris), set
\texttt{ranlib=echo}. And finally, you must
\texttt{RANLIB = echo}. And finally, you must
modify the \texttt{BLASLIB} definition to specify the BLAS library to which
you will be linking. If an optimized version of the BLAS is available
on your machine, you are highly recommended to link to that library.
@ -721,24 +721,24 @@ The version that will be used depends on the value of the TIMER variable in the
\begin{itemize}
\item If ETIME is available as an external function, set the value of the TIMER variable in your
make.inc to \texttt{EXT\_ETIME}:\texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used.
make.inc to \texttt{EXT\_ETIME}: \texttt{second\_EXT\_ETIME.f} and \texttt{dsecnd\_EXT\_ETIME.f} will be used.
Usually on HPPA architectures,
the compiler and loader flag \texttt{+U77} should be included to access
the compiler and linker flag \texttt{+U77} should be included to access
the function \texttt{ETIME}.
\item If ETIME\_ is available as an external function, set the value of the TIMER variable in your make.inc
to \texttt{EXT\_ETIME\_}:\texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used.
to \texttt{EXT\_ETIME\_}: \texttt{second\_EXT\_ETIME\_.f} and \texttt{dsecnd\_EXT\_ETIME\_.f} will be used.
It is the case on some IBM architectures such as IBM RS/6000s.
\item If ETIME is available as an internal function, set the value of the TIMER variable in your make.inc
to \texttt{INT\_ETIME}:\texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used.
to \texttt{INT\_ETIME}: \texttt{second\_INT\_ETIME.f} and \texttt{dsecnd\_INT\_ETIME.f} will be used.
This is the case with gfortan.
\item If CPU\_TIME is available as an internal function, set the value of the TIMER variable in your make.inc
to \texttt{INT\_CPU\_TIME}:\texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used.
to \texttt{INT\_CPU\_TIME}: \texttt{second\_INT\_CPU\_TIME.f} and \texttt{dsecnd\_INT\_CPU\_TIME.f} will be used.
\item If none of these function is available, set the value of the TIMER variable in your make.inc
to \texttt{NONE:}\texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used.
to \texttt{NONE}: \texttt{second\_NONE.f} and \texttt{dsecnd\_NONE.f} will be used.
These routines will always return zero.
\end{itemize}
@ -829,8 +829,8 @@ data type to the library if necessary.
\end{itemize}
\noindent
The BLAS library is created in \texttt{LAPACK/blas\_PLAT.a}, where
\texttt{PLAT} is the user-defined architecture suffix specified in the file
The BLAS library is created in \texttt{LAPACK/librefblas.a},
or in the user-defined location specified by \texttt{BLASLIB} in the file
\texttt{LAPACK/make.inc}.
\subsection{Run the BLAS Test Programs}\label{testblas}
@ -882,8 +882,8 @@ data type to the library if necessary.
\end{itemize}
\noindent
The LAPACK library is created in \texttt{LAPACK/lapack\_PLAT.a}, where
\texttt{PLAT} is the user-defined architecture suffix specified in the file
The LAPACK library is created in \texttt{LAPACK/liblapack.a},
or in the user-defined location specified by \texttt{LAPACKLIB} in the file
\texttt{LAPACK/make.inc}.
\subsection{Create the Test Matrix Generator Library}
@ -902,9 +902,9 @@ data type to the library if necessary.
\end{itemize}
\noindent
The test matrix generator library is created in \texttt{LAPACK/tmglib\_PLAT.a},
where \texttt{PLAT} is the user-defined architecture suffix specified in the
file \texttt{LAPACK/make.inc}.
The test matrix generator library is created in \texttt{LAPACK/libtmglib.a},
or in the user-defined location specified by \texttt{TMGLIB} in the file
\texttt{LAPACK/make.inc}.
\subsection{Run the LAPACK Test Programs}
@ -1114,9 +1114,7 @@ To make a library of the instrumented LAPACK routines, first
go to \texttt{LAPACK/TIMING/LIN/LINSRC} and type \texttt{make} followed
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}.
The library of instrumented code is created in
\texttt{LAPACK/TIMING/LIN/linsrc\_PLAT.a},
where \texttt{PLAT} is the user-defined architecture suffix specified in the
file \texttt{LAPACK/make.inc}.
\texttt{LAPACK/TIMING/LIN/linsrc.a}.
\end{sloppypar}
\item[b)]
@ -1251,9 +1249,7 @@ To make a library of the instrumented LAPACK routines, first
go to \texttt{LAPACK/TIMING/EIG/EIGSRC} and type \texttt{make} followed
by the data types desired, as in the examples of Section~\ref{toplevelmakefile}.
The library of instrumented code is created in
\texttt{LAPACK/TIMING/EIG/eigsrc\_PLAT.a},
where \texttt{PLAT} is the user-defined architecture suffix specified in the
file \texttt{LAPACK/make.inc}.
\texttt{LAPACK/TIMING/EIG/eigsrc.a}.
\end{sloppypar}
\item[b)]
@ -1389,7 +1385,7 @@ installing LAPACK on an SGI architecture.
\section{ETIME}
On HPPA architectures,
the compiler and loader flag \texttt{+U77} should be included to access
the compiler and linker flag \texttt{+U77} should be included to access
the function \texttt{ETIME}.
\section{ILAENV and IEEE-754 compliance}
@ -1494,13 +1490,13 @@ has two options: increase your stack size, or force all local variables
to be allocated statically.
On HPPA architectures, the
compiler and loader flag \texttt{-K} should be used when compiling these testing
compiler and linker flag \texttt{-K} should be used when compiling these testing
and timing main programs to avoid such a stack overflow. I.e., set
\texttt{DRVOPTS = -K} in the \texttt{LAPACK/make.inc} file.
\texttt{FFLAGS\_DRV = -K} in the \texttt{LAPACK/make.inc} file.
For similar reasons,
on SGI architectures, the compiler and loader flag \texttt{-static} should be
used. I.e., set \texttt{DRVOPTS = -static} in the \texttt{LAPACK/make.inc} file.
on SGI architectures, the compiler and linker flag \texttt{-static} should be
used. I.e., set \texttt{FFLAGS\_DRV = -static} in the \texttt{LAPACK/make.inc} file.
\section{IEEE arithmetic}

View File

@ -1,30 +1,33 @@
include ../make.inc
TOPSRCDIR = ..
include $(TOPSRCDIR)/make.inc
.PHONY: all testlsame testslamch testdlamch testsecond testdsecnd testieee testversion
all: testlsame testslamch testdlamch testsecond testdsecnd testieee testversion
testlsame: lsame.o lsametst.o
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
testslamch: slamch.o lsame.o slamchtst.o
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
testdlamch: dlamch.o lsame.o dlamchtst.o
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
testsecond: second_$(TIMER).o secondtst.o
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)"
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
testdsecnd: dsecnd_$(TIMER).o dsecndtst.o
@echo "[INFO] : TIMER value: $(TIMER) (given by make.inc)"
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
testieee: tstiee.o
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
testversion: ilaver.o LAPACK_version.o
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
.PHONY: run
run: all
./testlsame
./testslamch
@ -34,6 +37,7 @@ run: all
./testieee
./testversion
.PHONY: clean cleanobj cleanexe cleantest
clean: cleanobj cleanexe cleantest
cleanobj:
rm -f *.o
@ -42,9 +46,5 @@ cleanexe:
cleantest:
rm -f core
.SUFFIXES: .o .f
.f.o:
$(FORTRAN) $(OPTS) -c -o $@ $<
slamch.o: slamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $<
dlamch.o: dlamch.f ; $(FORTRAN) $(NOOPT) -c -o $@ $<
slamch.o: slamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<
dlamch.o: dlamch.f ; $(FC) $(FFLAGS_NOOPT) -c -o $@ $<

View File

@ -10,6 +10,10 @@
*
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
*
* .. Scalar Arguments ..
* CHARACTER CMACH
* ..
*
*
*> \par Purpose:
* =============
@ -24,6 +28,7 @@
*
*> \param[in] CMACH
*> \verbatim
*> CMACH is CHARACTER*1
*> Specifies the value to be returned by DLAMCH:
*> = 'E' or 'e', DLAMCH := eps
*> = 'S' or 's , DLAMCH := sfmin

View File

@ -10,6 +10,10 @@
*
* DOUBLE PRECISION FUNCTION DLAMCH( CMACH )
*
* .. Scalar Arguments ..
* CHARACTER CMACH
* ..
*
*
*> \par Purpose:
* =============

View File

@ -25,12 +25,15 @@
* ==========
*
*> \param[out] VERS_MAJOR
*> VERS_MAJOR is INTEGER
*> return the lapack major version
*>
*> \param[out] VERS_MINOR
*> VERS_MINOR is INTEGER
*> return the lapack minor version from the major version
*>
*> \param[out] VERS_PATCH
*> VERS_PATCH is INTEGER
*> return the lapack patch version from the minor version
*
* Authors:
@ -41,24 +44,23 @@
*> \author Univ. of Colorado Denver
*> \author NAG Ltd.
*
*> \date June 2017
*> \date November 2019
*
*> \ingroup auxOTHERauxiliary
*
* =====================================================================
SUBROUTINE ILAVER( VERS_MAJOR, VERS_MINOR, VERS_PATCH )
*
* -- LAPACK computational routine (version 3.7.1) --
* -- LAPACK computational routine --
* -- LAPACK is a software package provided by Univ. of Tennessee, --
* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
* June 2017
*
* =====================================================================
*
INTEGER VERS_MAJOR, VERS_MINOR, VERS_PATCH
* =====================================================================
VERS_MAJOR = 3
VERS_MINOR = 8
VERS_MINOR = 9
VERS_PATCH = 0
* =====================================================================
*

View File

@ -8,30 +8,28 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = cc
CC = cc
CFLAGS = -O4
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = f77
OPTS = -O4 -fpe1
DRVOPTS = $(OPTS)
NOOPT =
FC = f77
FFLAGS = -O4 -fpe1
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT =
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = f77
LOADOPTS =
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
AR = ar
ARFLAGS = cr
RANLIB = ranlib
# Timer for the SECOND and DSECND routines
#
@ -74,9 +72,9 @@ TIMER = EXT_ETIME
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
#BLASLIB = ../../librefblas.a
#BLASLIB = $(TOPSRCDIR)/librefblas.a
BLASLIB = -ldxml
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,30 +8,28 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = cc
CC = cc
CFLAGS =
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = f77
OPTS = +O4 +U77
DRVOPTS = $(OPTS) -K
NOOPT = +U77
FC = f77
FFLAGS = +O4 +U77
FFLAGS_DRV = $(FFLAGS) -K
FFLAGS_NOOPT = +U77
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = f77
LOADOPTS = -Aa +U77
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = echo
AR = ar
ARFLAGS = cr
RANLIB = echo
# Timer for the SECOND and DSECND routines
#
@ -74,9 +72,9 @@ TIMER = EXT_ETIME
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
#BLASLIB = ../../librefblas.a
#BLASLIB = $(TOPSRCDIR)/librefblas.a
BLASLIB = -lblas
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,33 +8,30 @@ SHELL = /sbin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = cc
CC = cc
CFLAGS = -O3
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = f77
OPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
#OPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
DRVOPTS = $(OPTS) -static
NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
#NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
FC = f77
FFLAGS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
#FFLAGS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
FFLAGS_DRV = $(FFLAGS) -static
FFLAGS_NOOPT = -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
#FFLAGS_NOOPT = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = f77
LOADOPTS = -O3 -64 -mips4 -r10000 -OPT:IEEE_NaN_inf=ON
#LOADOPTS = -g -DEBUG:subscript_check=ON -trapuv -OPT:IEEE_NaN_inf=ON
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = echo
AR = ar
ARFLAGS = cr
RANLIB = echo
# Timer for the SECOND and DSECND routines
#
@ -78,8 +75,8 @@ TIMER = EXT_ETIME
# possible.)
#
#BLASLIB = -lblas
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,33 +8,30 @@ SHELL = /sbin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = cc
CC = cc
CFLAGS = -O3
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = f77
OPTS = -O3 -64 -mips4 -r10000
#OPTS = -O3 -64 -mips4 -r10000 -mp
DRVOPTS = $(OPTS) -static
NOOPT = -64 -mips4 -r10000
#NOOPT = -64 -mips4 -r10000 -mp
FC = f77
FFLAGS = -O3 -64 -mips4 -r10000
#FFLAGS = -O3 -64 -mips4 -r10000 -mp
FFLAGS_DRV = $(FFLAGS) -static
FFLAGS_NOOPT = -64 -mips4 -r10000
#FFLAGS_NOOPT = -64 -mips4 -r10000 -mp
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = f77
LOADOPTS = -O3 -64 -mips4 -r10000
#LOADOPTS = -O3 -64 -mips4 -r10000 -mp
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = echo
AR = ar
ARFLAGS = cr
RANLIB = echo
# Timer for the SECOND and DSECND routines
#
@ -79,8 +76,8 @@ TIMER = EXT_ETIME
#
BLASLIB = -lblas
#BLASLIB = -lblas_mp
#BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
#BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,30 +8,28 @@ SHELL = /sbin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = cc
CC = cc
CFLAGS = -O4
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = f77
OPTS = -O4
DRVOPTS = $(OPTS) -static
NOOPT =
FC = f77
FFLAGS = -O4
FFLAGS_DRV = $(FFLAGS) -static
FFLAGS_NOOPT =
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = f77
LOADOPTS =
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = echo
AR = ar
ARFLAGS = cr
RANLIB = echo
# Timer for the SECOND and DSECND routines
#
@ -75,8 +73,8 @@ TIMER = EXT_ETIME
# possible.)
#
#BLASLIB = -lblas
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,30 +8,28 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = cc
CC = cc
CFLAGS = -O3
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = f77
OPTS = -dalign -O4 -fast
DRVOPTS = $(OPTS)
NOOPT =
FC = f77
FFLAGS = -dalign -O4 -fast
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT =
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = f77
LOADOPTS = -dalign -O4 -fast
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
AR = ar
ARFLAGS = cr
RANLIB = ranlib
# Timer for the SECOND and DSECND routines
#
@ -75,8 +73,8 @@ TIMER = EXT_ETIME
# possible.)
#
#BLASLIB = -lblas
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,34 +8,31 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = cc
CC = cc
CFLAGS = -O3
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = f77
#OPTS = -O4 -u -f -mt
#OPTS = -u -f -dalign -native -xO5 -xarch=v8plusa
OPTS = -u -f -dalign -native -xO2 -xarch=v8plusa
DRVOPTS = $(OPTS)
NOOPT = -u -f
#NOOPT = -u -f -mt
FC = f77
#FFLAGS = -O4 -u -f -mt
#FFLAGS = -u -f -dalign -native -xO5 -xarch=v8plusa
FFLAGS = -u -f -dalign -native -xO2 -xarch=v8plusa
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT = -u -f
#FFLAGS_NOOPT = -u -f -mt
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = f77
#LOADOPTS = -mt
LOADOPTS = -f -dalign -native -xO2 -xarch=v8plusa
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = echo
AR = ar
ARFLAGS = cr
RANLIB = echo
# Timer for the SECOND and DSECND routines
#
@ -78,10 +75,10 @@ TIMER = EXT_ETIME
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
#BLASLIB = ../../librefblas.a
#BLASLIB = $(TOPSRCDIR)/librefblas.a
#BLASLIB = -xlic_lib=sunperf_mt
BLASLIB = -xlic_lib=sunperf
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,31 +8,29 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = xlc
CC = xlc
CFLAGS = -O3 -qnosave
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = xlf
OPTS = -O3 -qfixed -qnosave
FC = xlf
FFLAGS = -O3 -qfixed -qnosave
# For -O2, add -qstrict=none
DRVOPTS = $(OPTS)
NOOPT = -O0 -qfixed -qnosave
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT = -O0 -qfixed -qnosave
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = xlf
LOADOPTS = -qnosave
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
AR = ar
ARFLAGS = cr
RANLIB = ranlib
# Timer for the SECOND and DSECND routines
#
@ -75,9 +73,9 @@ TIMER = EXT_ETIME_
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
#BLASLIB = ../../librefblas.a
#BLASLIB = $(TOPSRCDIR)/librefblas.a
BLASLIB = -lessl
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,10 +8,10 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = gcc
CC = gcc
CFLAGS = -O3
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
@ -19,23 +19,21 @@ CFLAGS = -O3
# and handle these quantities appropriately. As a consequence, one
# should not compile LAPACK with flags such as -ffpe-trap=overflow.
#
FORTRAN = gfortran
OPTS = -O2 -frecursive
DRVOPTS = $(OPTS)
NOOPT = -O0 -frecursive
FC = gfortran
FFLAGS = -O2 -frecursive
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT = -O0 -frecursive
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = gfortran
LOADOPTS =
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
AR = ar
ARFLAGS = cr
RANLIB = ranlib
# Timer for the SECOND and DSECND routines
#
@ -78,8 +76,8 @@ TIMER = INT_ETIME
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,10 +8,10 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = gcc
CC = gcc
CFLAGS = -g
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
@ -19,23 +19,21 @@ CFLAGS = -g
# and handle these quantities appropriately. As a consequence, one
# should not compile LAPACK with flags such as -ffpe-trap=overflow.
#
FORTRAN = gfortran -fimplicit-none -g -frecursive
OPTS =
DRVOPTS = $(OPTS)
NOOPT = -g -O0 -frecursive
FC = gfortran
FFLAGS = -fimplicit-none -g -frecursive
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT = $(FFLAGS) -O0
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = gfortran -g
LOADOPTS =
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
AR = ar
ARFLAGS = cr
RANLIB = ranlib
# Timer for the SECOND and DSECND routines
#
@ -78,8 +76,8 @@ TIMER = INT_CPU_TIME
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,30 +8,28 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = icc
CC = icc
CFLAGS = -O3
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = ifort
OPTS = -O3 -fp-model strict -assume protect_parens
DRVOPTS = $(OPTS)
NOOPT = -O0 -fp-model strict -assume protect_parens
FC = ifort
FFLAGS = -O3 -fp-model strict -assume protect_parens
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT = -O0 -fp-model strict -assume protect_parens
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = ifort
LOADOPTS =
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = ranlib
AR = ar
ARFLAGS = cr
RANLIB = ranlib
# Timer for the SECOND and DSECND routines
#
@ -74,8 +72,8 @@ TIMER = EXT_ETIME
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,30 +8,28 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = pgcc
CC = pgcc
CFLAGS =
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = pgf95
OPTS = -O3
DRVOPTS = $(OPTS)
NOOPT = -O0
FC = pgf95
FFLAGS = -O3
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT = -O0
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = $(FORTRAN)
LOADOPTS =
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = echo
AR = ar
ARFLAGS = cr
RANLIB = echo
# Timer for the SECOND and DSECND routines
#
@ -74,8 +72,8 @@ TIMER = INT_CPU_TIME
# machine-specific, optimized BLAS library should be used whenever
# possible.)
#
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -8,30 +8,28 @@ SHELL = /bin/sh
# CC is the C compiler, normally invoked with options CFLAGS.
#
CC = pghpc
CC = pghpc
CFLAGS =
# Modify the FORTRAN and OPTS definitions to refer to the compiler
# Modify the FC and FFLAGS definitions to the desired compiler
# and desired compiler options for your machine. NOOPT refers to
# the compiler options desired when NO OPTIMIZATION is selected.
#
FORTRAN = pghpf
OPTS = -O4 -Mnohpfc -Mdclchk
DRVOPTS = $(OPTS)
NOOPT = -Mnohpfc -Mdclchk
FC = pghpf
FFLAGS = -O4 -Mnohpfc -Mdclchk
FFLAGS_DRV = $(FFLAGS)
FFLAGS_NOOPT = -Mnohpfc -Mdclchk
# Define LOADER and LOADOPTS to refer to the loader and desired
# load options for your machine.
# Define LDFLAGS to the desired linker options for your machine.
#
LOADER = pghpf
LOADOPTS =
LDFLAGS =
# The archiver and the flag(s) to use when building an archive
# (library). If your system has no ranlib, set RANLIB = echo.
#
ARCH = ar
ARCHFLAGS = cr
RANLIB = echo
AR = ar
ARFLAGS = cr
RANLIB = echo
# Timer for the SECOND and DSECND routines
#
@ -75,8 +73,8 @@ TIMER = EXT_ETIME
# possible.)
#
#BLASLIB = -lessl
BLASLIB = ../../librefblas.a
CBLASLIB = ../../libcblas.a
LAPACKLIB = liblapack.a
TMGLIB = libtmglib.a
LAPACKELIB = liblapacke.a
BLASLIB = $(TOPSRCDIR)/librefblas.a
CBLASLIB = $(TOPSRCDIR)/libcblas.a
LAPACKLIB = $(TOPSRCDIR)/liblapack.a
TMGLIB = $(TOPSRCDIR)/libtmglib.a
LAPACKELIB = $(TOPSRCDIR)/liblapacke.a

View File

@ -28,6 +28,7 @@
*
*> \param[in] CMACH
*> \verbatim
*> CMACH is CHARACTER*1
*> Specifies the value to be returned by SLAMCH:
*> = 'E' or 'e', SLAMCH := eps
*> = 'S' or 's , SLAMCH := sfmin

View File

@ -16,18 +16,16 @@ if(NOT FortranCInterface_GLOBAL_FOUND OR NOT FortranCInterface_MODULE_FOUND)
${LAPACK_BINARY_DIR}/include/lapacke_mangling.h)
endif()
if(WIN32 AND NOT UNIX)
add_definitions(-DHAVE_LAPACK_CONFIG_H -DLAPACK_COMPLEX_STRUCTURE)
message(STATUS "Windows BUILD")
endif()
get_directory_property(DirDefs COMPILE_DEFINITIONS)
include_directories(include ${LAPACK_BINARY_DIR}/include)
add_subdirectory(include)
add_subdirectory(src)
add_subdirectory(utils)
option(LAPACKE_BUILD_SINGLE "Build LAPACKE single precision real" ON)
option(LAPACKE_BUILD_DOUBLE "Build LAPACKE double precision real" ON)
option(LAPACKE_BUILD_COMPLEX "Build LAPACKE single precision complex" ON)
option(LAPACKE_BUILD_COMPLEX16 "Build LAPACKE double precision complex" ON)
macro(append_subdir_files variable dirname)
get_directory_property(holder DIRECTORY ${dirname} DEFINITION ${variable})
foreach(depfile ${holder})
@ -35,8 +33,29 @@ macro(append_subdir_files variable dirname)
endforeach()
endmacro()
message(STATUS "Build LAPACKE single precision real: ${LAPACKE_BUILD_SINGLE}")
message(STATUS "Build LAPACKE double precision real: ${LAPACKE_BUILD_DOUBLE}")
message(STATUS "Build LAPACKE single precision complex: ${LAPACKE_BUILD_COMPLEX}")
message(STATUS "Build LAPACKE double precision complex: ${LAPACKE_BUILD_COMPLEX16}")
append_subdir_files(LAPACKE_INCLUDE "include")
append_subdir_files(SOURCES "src")
if (LAPACKE_BUILD_SINGLE)
append_subdir_files(SOURCES_SINGLE "src")
list(APPEND SOURCES ${SOURCES_SINGLE})
endif()
if (LAPACKE_BUILD_DOUBLE)
append_subdir_files(SOURCES_DOUBLE "src")
list(APPEND SOURCES ${SOURCES_DOUBLE})
endif()
if (LAPACKE_BUILD_COMPLEX)
append_subdir_files(SOURCES_COMPLEX "src")
list(APPEND SOURCES ${SOURCES_COMPLEX})
endif()
if (LAPACKE_BUILD_COMPLEX16)
append_subdir_files(SOURCES_COMPLEX16 "src")
list(APPEND SOURCES ${SOURCES_COMPLEX16})
endif()
append_subdir_files(DEPRECATED "src")
append_subdir_files(EXTENDED "src")
append_subdir_files(MATGEN "src")
@ -61,9 +80,13 @@ set_target_properties(
SOVERSION ${LAPACK_MAJOR_VERSION}
)
target_include_directories(lapacke PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/../include>
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>
)
if(WIN32 AND NOT UNIX)
target_compile_definitions(lapacke PUBLIC HAVE_LAPACK_CONFIG_H LAPACK_COMPLEX_STRUCTURE)
message(STATUS "Windows BUILD")
endif()
if(LAPACKE_WITH_TMG)
target_link_libraries(lapacke PRIVATE tmglib)
@ -71,7 +94,11 @@ endif()
target_link_libraries(lapacke PRIVATE ${LAPACK_LIBRARIES})
lapack_install_library(lapacke)
install(FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(
FILES ${LAPACKE_INCLUDE} ${LAPACK_BINARY_DIR}/include/lapacke_mangling.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Development
)
if(BUILD_TESTING)
add_subdirectory(example)
@ -82,6 +109,7 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/lapacke.pc.in ${CMAKE_CURRENT_BINARY_
install(FILES
${CMAKE_CURRENT_BINARY_DIR}/lapacke.pc
DESTINATION ${PKG_CONFIG_DIR}
COMPONENT Development
)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/cmake/lapacke-config-version.cmake.in
@ -95,7 +123,10 @@ install(FILES
${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/lapacke-config.cmake
${LAPACK_BINARY_DIR}/lapacke-config-version.cmake
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
COMPONENT Development
)
install(EXPORT lapacke-targets
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION})
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/lapacke-${LAPACK_VERSION}
COMPONENT Development
)

View File

@ -40,22 +40,26 @@
# To clean everything including lapacke library type
# 'make cleanall'
#
include ../make.inc
TOPSRCDIR = ..
include $(TOPSRCDIR)/make.inc
.PHONY: all
all: lapacke
.PHONY: lapacke
lapacke: include/lapacke_mangling.h
$(MAKE) -C src
$(MAKE) -C utils
include/lapacke_mangling.h: include/lapacke_mangling_with_flags.h.in
cp $< $@
cp include/lapacke_mangling_with_flags.h.in $@
.PHONY: lapacke_example
lapacke_example: lapacke
$(MAKE) -C example
#clean: cleanlib
clean: cleanobj
.PHONY: clean cleanobj cleanlib cleanexe
clean:
$(MAKE) -C src clean
$(MAKE) -C utils clean
$(MAKE) -C example clean
@ -64,6 +68,6 @@ cleanobj:
$(MAKE) -C utils cleanobj
$(MAKE) -C example cleanobj
cleanlib:
rm -f ../$(LAPACKELIB)
$(MAKE) -C src cleanlib
cleanexe:
$(MAKE) -C example cleanexe

View File

@ -7,8 +7,11 @@ if(NOT TARGET lapacke)
include("@LAPACK_BINARY_DIR@/lapack-targets.cmake")
endif()
# Hint for project building against lapack
set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
# Report lapacke header search locations from build tree.
set(LAPACKE_INCLUDE_DIRS "@LAPACK_BINARY_DIR@/include")
# Report lapacke libraries.
set(LAPACKE_LIBRARIES lapacke)
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})

View File

@ -13,11 +13,14 @@ if(NOT TARGET lapacke)
include(${_LAPACKE_SELF_DIR}/lapacke-targets.cmake)
endif()
# Hint for project building against lapack
set(LAPACKE_Fortran_COMPILER_ID ${LAPACK_Fortran_COMPILER_ID})
# Report lapacke header search locations.
set(LAPACKE_INCLUDE_DIRS ${_LAPACKE_PREFIX}/include)
# Report lapacke libraries.
set(LAPACKE_LIBRARIES lapacke)
set(LAPACKE_LIBRARIES lapacke ${LAPACK_LIBRARIES})
unset(_LAPACKE_PREFIX)
unset(_LAPACKE_SELF_DIR)

View File

@ -1,34 +1,38 @@
include ../../make.inc
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc
.SUFFIXES: .c .o
.c.o:
$(CC) $(CFLAGS) -I. -I../include -c -o $@ $<
.PHONY: all
all: xexample_DGESV_rowmajor \
xexample_DGESV_colmajor \
xexample_DGELS_rowmajor \
xexample_DGELS_colmajor
LIBRARIES = ../../$(LAPACKELIB) ../../$(LAPACKLIB) $(BLASLIB)
LIBRARIES = $(LAPACKELIB) $(LAPACKLIB) $(BLASLIB)
# Double Precision Examples
xexample_DGESV_rowmajor: example_DGESV_rowmajor.o lapacke_example_aux.o $(LIBRARIES)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
./$@
xexample_DGESV_colmajor: example_DGESV_colmajor.o lapacke_example_aux.o $(LIBRARIES)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
./$@
xexample_DGELS_rowmajor: example_DGELS_rowmajor.o lapacke_example_aux.o $(LIBRARIES)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
./$@
xexample_DGELS_colmajor: example_DGELS_colmajor.o lapacke_example_aux.o $(LIBRARIES)
$(LOADER) $(LOADOPTS) -o $@ $^
$(FC) $(FFLAGS) $(LDFLAGS) -o $@ $^
./$@
.PHONY: clean cleanobj cleanexe
clean: cleanobj cleanexe
cleanobj:
rm -f *.o
cleanexe:
rm -f x*
.c.o:
$(CC) $(CFLAGS) -I. -I../include -c -o $@ $<

View File

@ -1,3 +1,3 @@
set(LAPACKE_INCLUDE lapacke.h lapacke_config.h lapacke_utils.h)
set(LAPACKE_INCLUDE lapacke.h lapack.h lapacke_config.h lapacke_utils.h)
file(COPY ${LAPACKE_INCLUDE} DESTINATION ${LAPACK_BINARY_DIR}/include)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,4 +1,4 @@
set(SOURCES
set(SOURCES_COMPLEX
lapacke_cbbcsd.c
lapacke_cbbcsd_work.c
lapacke_cbdsqr.c
@ -78,11 +78,11 @@ lapacke_cgeqrf_work.c
lapacke_cgeqrfp.c
lapacke_cgeqrfp_work.c
lapacke_cgeqrt.c
lapacke_cgeqrt_work.c
lapacke_cgeqrt2.c
lapacke_cgeqrt2_work.c
lapacke_cgeqrt3.c
lapacke_cgeqrt3_work.c
lapacke_cgeqrt_work.c
lapacke_cgerfs.c
lapacke_cgerfs_work.c
lapacke_cgerqf.c
@ -93,6 +93,8 @@ lapacke_cgesv.c
lapacke_cgesv_work.c
lapacke_cgesvd.c
lapacke_cgesvd_work.c
lapacke_cgesvdq.c
lapacke_cgesvdq_work.c
lapacke_cgesvdx.c
lapacke_cgesvdx_work.c
lapacke_cgesvj.c
@ -129,10 +131,10 @@ lapacke_cggevx.c
lapacke_cggevx_work.c
lapacke_cggglm.c
lapacke_cggglm_work.c
lapacke_cgghrd.c
lapacke_cgghrd_work.c
lapacke_cgghd3.c
lapacke_cgghd3_work.c
lapacke_cgghrd.c
lapacke_cgghrd_work.c
lapacke_cgglse.c
lapacke_cgglse_work.c
lapacke_cggqrf.c
@ -157,14 +159,14 @@ lapacke_cgttrs.c
lapacke_cgttrs_work.c
lapacke_chbev.c
lapacke_chbev_work.c
lapacke_chbevd.c
lapacke_chbevd_work.c
lapacke_chbevx.c
lapacke_chbevx_work.c
lapacke_chbev_2stage.c
lapacke_chbev_2stage_work.c
lapacke_chbevd.c
lapacke_chbevd_work.c
lapacke_chbevd_2stage.c
lapacke_chbevd_2stage_work.c
lapacke_chbevx.c
lapacke_chbevx_work.c
lapacke_chbevx_2stage.c
lapacke_chbevx_2stage_work.c
lapacke_chbgst.c
@ -185,18 +187,18 @@ lapacke_cheequb.c
lapacke_cheequb_work.c
lapacke_cheev.c
lapacke_cheev_work.c
lapacke_cheevd.c
lapacke_cheevd_work.c
lapacke_cheevr.c
lapacke_cheevr_work.c
lapacke_cheevx.c
lapacke_cheevx_work.c
lapacke_cheev_2stage.c
lapacke_cheev_2stage_work.c
lapacke_cheevd.c
lapacke_cheevd_work.c
lapacke_cheevd_2stage.c
lapacke_cheevd_2stage_work.c
lapacke_cheevr.c
lapacke_cheevr_work.c
lapacke_cheevr_2stage.c
lapacke_cheevr_2stage_work.c
lapacke_cheevx.c
lapacke_cheevx_work.c
lapacke_cheevx_2stage.c
lapacke_cheevx_2stage_work.c
lapacke_chegst.c
@ -214,8 +216,8 @@ lapacke_cherfs_work.c
lapacke_chesv.c
lapacke_chesv_work.c
lapacke_chesv_aa.c
lapacke_chesv_aa_2stage.c
lapacke_chesv_aa_work.c
lapacke_chesv_aa_2stage.c
lapacke_chesv_aa_2stage_work.c
lapacke_chesv_rk.c
lapacke_chesv_rk_work.c
@ -226,35 +228,35 @@ lapacke_cheswapr_work.c
lapacke_chetrd.c
lapacke_chetrd_work.c
lapacke_chetrf.c
lapacke_chetrf_rook.c
lapacke_chetrf_work.c
lapacke_chetrf_rook_work.c
lapacke_chetrf_aa.c
lapacke_chetrf_aa_2stage.c
lapacke_chetrf_aa_work.c
lapacke_chetrf_aa_2stage.c
lapacke_chetrf_aa_2stage_work.c
lapacke_chetrf_rk.c
lapacke_chetrf_rk_work.c
lapacke_chetrf_rook.c
lapacke_chetrf_rook_work.c
lapacke_chetri.c
lapacke_chetri_work.c
lapacke_chetri2.c
lapacke_chetri2_work.c
lapacke_chetri_3.c
lapacke_chetri_3_work.c
lapacke_chetri2x.c
lapacke_chetri2x_work.c
lapacke_chetri_work.c
lapacke_chetri_3.c
lapacke_chetri_3_work.c
lapacke_chetrs.c
lapacke_chetrs_rook.c
lapacke_chetrs_work.c
lapacke_chetrs2.c
lapacke_chetrs2_work.c
lapacke_chetrs_work.c
lapacke_chetrs_rook_work.c
lapacke_chetrs_aa.c
lapacke_chetrs_aa_2stage.c
lapacke_chetrs_aa_work.c
lapacke_chetrs_aa_2stage_work.c
lapacke_chetrs_3.c
lapacke_chetrs_3_work.c
lapacke_chetrs_aa.c
lapacke_chetrs_aa_work.c
lapacke_chetrs_aa_2stage.c
lapacke_chetrs_aa_2stage_work.c
lapacke_chetrs_rook.c
lapacke_chetrs_rook_work.c
lapacke_chfrk.c
lapacke_chfrk_work.c
lapacke_chgeqz.c
@ -445,52 +447,54 @@ lapacke_csyconv.c
lapacke_csyconv_work.c
lapacke_csyequb.c
lapacke_csyequb_work.c
lapacke_csyr.c
lapacke_csyr_work.c
lapacke_csyrfs.c
lapacke_csyrfs_work.c
lapacke_csysv.c
lapacke_csysv_rook.c
lapacke_csysv_rook_work.c
lapacke_csysv_work.c
lapacke_csysv_aa.c
lapacke_csysv_aa_2stage.c
lapacke_csysv_aa_work.c
lapacke_csysv_aa_2stage.c
lapacke_csysv_aa_2stage_work.c
lapacke_csysv_rk.c
lapacke_csysv_rk_work.c
lapacke_csysv_rook.c
lapacke_csysv_rook_work.c
lapacke_csysvx.c
lapacke_csysvx_work.c
lapacke_csyswapr.c
lapacke_csyswapr_work.c
lapacke_csytrf.c
lapacke_csytrf_work.c
lapacke_csytrf_rook.c
lapacke_csytrf_rook_work.c
lapacke_csytrf_aa.c
lapacke_csytrf_aa_2stage.c
lapacke_csytrf_aa_work.c
lapacke_csytrf_aa_2stage.c
lapacke_csytrf_aa_2stage_work.c
lapacke_csytrf_rk.c
lapacke_csytrf_rk_work.c
lapacke_csytrf_rook.c
lapacke_csytrf_rook_work.c
lapacke_csytri.c
lapacke_csytri_work.c
lapacke_csytri2.c
lapacke_csytri2_work.c
lapacke_csytri_3.c
lapacke_csytri_3_work.c
lapacke_csytri2x.c
lapacke_csytri2x_work.c
lapacke_csytri_work.c
lapacke_csytri_3.c
lapacke_csytri_3_work.c
lapacke_csytrs.c
lapacke_csytrs_rook.c
lapacke_csytrs_work.c
lapacke_csytrs2.c
lapacke_csytrs2_work.c
lapacke_csytrs_work.c
lapacke_csytrs_rook_work.c
lapacke_csytrs_aa.c
lapacke_csytrs_aa_2stage.c
lapacke_csytrs_aa_work.c
lapacke_csytrs_aa_2stage_work.c
lapacke_csytrs_3.c
lapacke_csytrs_3_work.c
lapacke_csytrs_aa.c
lapacke_csytrs_aa_work.c
lapacke_csytrs_aa_2stage.c
lapacke_csytrs_aa_2stage_work.c
lapacke_csytrs_rook.c
lapacke_csytrs_rook_work.c
lapacke_ctbcon.c
lapacke_ctbcon_work.c
lapacke_ctbrfs.c
@ -522,9 +526,9 @@ lapacke_ctpcon_work.c
lapacke_ctpmqrt.c
lapacke_ctpmqrt_work.c
lapacke_ctpqrt.c
lapacke_ctpqrt_work.c
lapacke_ctpqrt2.c
lapacke_ctpqrt2_work.c
lapacke_ctpqrt_work.c
lapacke_ctprfb.c
lapacke_ctprfb_work.c
lapacke_ctprfs.c
@ -601,14 +605,16 @@ lapacke_cupgtr.c
lapacke_cupgtr_work.c
lapacke_cupmtr.c
lapacke_cupmtr_work.c
)
set(SOURCES_DOUBLE
lapacke_dbbcsd.c
lapacke_dbbcsd_work.c
lapacke_dbdsdc.c
lapacke_dbdsdc_work.c
lapacke_dbdsvdx.c
lapacke_dbdsvdx_work.c
lapacke_dbdsqr.c
lapacke_dbdsqr_work.c
lapacke_dbdsvdx.c
lapacke_dbdsvdx_work.c
lapacke_ddisna.c
lapacke_ddisna_work.c
lapacke_dgbbrd.c
@ -686,11 +692,11 @@ lapacke_dgeqrf_work.c
lapacke_dgeqrfp.c
lapacke_dgeqrfp_work.c
lapacke_dgeqrt.c
lapacke_dgeqrt_work.c
lapacke_dgeqrt2.c
lapacke_dgeqrt2_work.c
lapacke_dgeqrt3.c
lapacke_dgeqrt3_work.c
lapacke_dgeqrt_work.c
lapacke_dgerfs.c
lapacke_dgerfs_work.c
lapacke_dgerqf.c
@ -701,6 +707,8 @@ lapacke_dgesv.c
lapacke_dgesv_work.c
lapacke_dgesvd.c
lapacke_dgesvd_work.c
lapacke_dgesvdq.c
lapacke_dgesvdq_work.c
lapacke_dgesvdx.c
lapacke_dgesvdx_work.c
lapacke_dgesvj.c
@ -737,10 +745,10 @@ lapacke_dggevx.c
lapacke_dggevx_work.c
lapacke_dggglm.c
lapacke_dggglm_work.c
lapacke_dgghrd.c
lapacke_dgghrd_work.c
lapacke_dgghd3.c
lapacke_dgghd3_work.c
lapacke_dgghrd.c
lapacke_dgghrd_work.c
lapacke_dgglse.c
lapacke_dgglse_work.c
lapacke_dggqrf.c
@ -823,10 +831,10 @@ lapacke_dopmtr.c
lapacke_dopmtr_work.c
lapacke_dorbdb.c
lapacke_dorbdb_work.c
lapacke_dorcsd2by1.c
lapacke_dorcsd2by1_work.c
lapacke_dorcsd.c
lapacke_dorcsd_work.c
lapacke_dorcsd2by1.c
lapacke_dorcsd2by1_work.c
lapacke_dorgbr.c
lapacke_dorgbr_work.c
lapacke_dorghr.c
@ -933,14 +941,14 @@ lapacke_dpttrs.c
lapacke_dpttrs_work.c
lapacke_dsbev.c
lapacke_dsbev_work.c
lapacke_dsbevd.c
lapacke_dsbevd_work.c
lapacke_dsbevx.c
lapacke_dsbevx_work.c
lapacke_dsbev_2stage.c
lapacke_dsbev_2stage_work.c
lapacke_dsbevd.c
lapacke_dsbevd_work.c
lapacke_dsbevd_2stage.c
lapacke_dsbevd_2stage_work.c
lapacke_dsbevx.c
lapacke_dsbevx_work.c
lapacke_dsbevx_2stage.c
lapacke_dsbevx_2stage_work.c
lapacke_dsbgst.c
@ -1021,18 +1029,18 @@ lapacke_dsyequb.c
lapacke_dsyequb_work.c
lapacke_dsyev.c
lapacke_dsyev_work.c
lapacke_dsyevd.c
lapacke_dsyevd_work.c
lapacke_dsyevr.c
lapacke_dsyevr_work.c
lapacke_dsyevx.c
lapacke_dsyevx_work.c
lapacke_dsyev_2stage.c
lapacke_dsyev_2stage_work.c
lapacke_dsyevd.c
lapacke_dsyevd_work.c
lapacke_dsyevd_2stage.c
lapacke_dsyevd_2stage_work.c
lapacke_dsyevr.c
lapacke_dsyevr_work.c
lapacke_dsyevr_2stage.c
lapacke_dsyevr_2stage_work.c
lapacke_dsyevx.c
lapacke_dsyevx_work.c
lapacke_dsyevx_2stage.c
lapacke_dsyevx_2stage_work.c
lapacke_dsygst.c
@ -1048,15 +1056,15 @@ lapacke_dsygvx_work.c
lapacke_dsyrfs.c
lapacke_dsyrfs_work.c
lapacke_dsysv.c
lapacke_dsysv_rook.c
lapacke_dsysv_rook_work.c
lapacke_dsysv_work.c
lapacke_dsysv_aa.c
lapacke_dsysv_aa_2stage.c
lapacke_dsysv_aa_work.c
lapacke_dsysv_aa_2stage.c
lapacke_dsysv_aa_2stage_work.c
lapacke_dsysv_rk.c
lapacke_dsysv_rk_work.c
lapacke_dsysv_rook.c
lapacke_dsysv_rook_work.c
lapacke_dsysvx.c
lapacke_dsysvx_work.c
lapacke_dsyswapr.c
@ -1065,33 +1073,33 @@ lapacke_dsytrd.c
lapacke_dsytrd_work.c
lapacke_dsytrf.c
lapacke_dsytrf_work.c
lapacke_dsytrf_rook.c
lapacke_dsytrf_rook_work.c
lapacke_dsytrf_aa.c
lapacke_dsytrf_aa_2stage.c
lapacke_dsytrf_aa_work.c
lapacke_dsytrf_aa_2stage.c
lapacke_dsytrf_aa_2stage_work.c
lapacke_dsytrf_rk.c
lapacke_dsytrf_rk_work.c
lapacke_dsytrf_rook.c
lapacke_dsytrf_rook_work.c
lapacke_dsytri.c
lapacke_dsytri_work.c
lapacke_dsytri2.c
lapacke_dsytri2_work.c
lapacke_dsytri_3.c
lapacke_dsytri_3_work.c
lapacke_dsytri2x.c
lapacke_dsytri2x_work.c
lapacke_dsytri_work.c
lapacke_dsytri_3.c
lapacke_dsytri_3_work.c
lapacke_dsytrs.c
lapacke_dsytrs_rook.c
lapacke_dsytrs_work.c
lapacke_dsytrs2.c
lapacke_dsytrs2_work.c
lapacke_dsytrs_aa.c
lapacke_dsytrs_aa_2stage.c
lapacke_dsytrs_aa_work.c
lapacke_dsytrs_aa_2stage_work.c
lapacke_dsytrs_3.c
lapacke_dsytrs_3_work.c
lapacke_dsytrs_work.c
lapacke_dsytrs_aa.c
lapacke_dsytrs_aa_work.c
lapacke_dsytrs_aa_2stage.c
lapacke_dsytrs_aa_2stage_work.c
lapacke_dsytrs_rook.c
lapacke_dsytrs_rook_work.c
lapacke_dtbcon.c
lapacke_dtbcon_work.c
@ -1124,9 +1132,9 @@ lapacke_dtpcon_work.c
lapacke_dtpmqrt.c
lapacke_dtpmqrt_work.c
lapacke_dtpqrt.c
lapacke_dtpqrt_work.c
lapacke_dtpqrt2.c
lapacke_dtpqrt2_work.c
lapacke_dtpqrt_work.c
lapacke_dtprfb.c
lapacke_dtprfb_work.c
lapacke_dtprfs.c
@ -1163,15 +1171,21 @@ lapacke_dtrttp.c
lapacke_dtrttp_work.c
lapacke_dtzrzf.c
lapacke_dtzrzf_work.c
)
set(SOURCES
lapacke_nancheck.c
lapacke_ilaver.c
)
set(SOURCES_SINGLE
lapacke_sbbcsd.c
lapacke_sbbcsd_work.c
lapacke_sbdsdc.c
lapacke_sbdsdc_work.c
lapacke_sbdsvdx.c
lapacke_sbdsvdx_work.c
lapacke_sbdsqr.c
lapacke_sbdsqr_work.c
lapacke_sbdsvdx.c
lapacke_sbdsvdx_work.c
lapacke_sdisna.c
lapacke_sdisna_work.c
lapacke_sgbbrd.c
@ -1249,11 +1263,11 @@ lapacke_sgeqrf_work.c
lapacke_sgeqrfp.c
lapacke_sgeqrfp_work.c
lapacke_sgeqrt.c
lapacke_sgeqrt_work.c
lapacke_sgeqrt2.c
lapacke_sgeqrt2_work.c
lapacke_sgeqrt3.c
lapacke_sgeqrt3_work.c
lapacke_sgeqrt_work.c
lapacke_sgerfs.c
lapacke_sgerfs_work.c
lapacke_sgerqf.c
@ -1264,6 +1278,8 @@ lapacke_sgesv.c
lapacke_sgesv_work.c
lapacke_sgesvd.c
lapacke_sgesvd_work.c
lapacke_sgesvdq.c
lapacke_sgesvdq_work.c
lapacke_sgesvdx.c
lapacke_sgesvdx_work.c
lapacke_sgesvj.c
@ -1300,10 +1316,10 @@ lapacke_sggevx.c
lapacke_sggevx_work.c
lapacke_sggglm.c
lapacke_sggglm_work.c
lapacke_sgghrd.c
lapacke_sgghrd_work.c
lapacke_sgghd3.c
lapacke_sgghd3_work.c
lapacke_sgghrd.c
lapacke_sgghrd_work.c
lapacke_sgglse.c
lapacke_sgglse_work.c
lapacke_sggqrf.c
@ -1496,14 +1512,14 @@ lapacke_spttrs.c
lapacke_spttrs_work.c
lapacke_ssbev.c
lapacke_ssbev_work.c
lapacke_ssbevd.c
lapacke_ssbevd_work.c
lapacke_ssbevx.c
lapacke_ssbevx_work.c
lapacke_ssbev_2stage.c
lapacke_ssbev_2stage_work.c
lapacke_ssbevd.c
lapacke_ssbevd_work.c
lapacke_ssbevd_2stage.c
lapacke_ssbevd_2stage_work.c
lapacke_ssbevx.c
lapacke_ssbevx_work.c
lapacke_ssbevx_2stage.c
lapacke_ssbevx_2stage_work.c
lapacke_ssbgst.c
@ -1580,18 +1596,18 @@ lapacke_ssyequb.c
lapacke_ssyequb_work.c
lapacke_ssyev.c
lapacke_ssyev_work.c
lapacke_ssyevd.c
lapacke_ssyevd_work.c
lapacke_ssyevr.c
lapacke_ssyevr_work.c
lapacke_ssyevx.c
lapacke_ssyevx_work.c
lapacke_ssyev_2stage.c
lapacke_ssyev_2stage_work.c
lapacke_ssyevd.c
lapacke_ssyevd_work.c
lapacke_ssyevd_2stage.c
lapacke_ssyevd_2stage_work.c
lapacke_ssyevr.c
lapacke_ssyevr_work.c
lapacke_ssyevr_2stage.c
lapacke_ssyevr_2stage_work.c
lapacke_ssyevx.c
lapacke_ssyevx_work.c
lapacke_ssyevx_2stage.c
lapacke_ssyevx_2stage_work.c
lapacke_ssygst.c
@ -1607,8 +1623,6 @@ lapacke_ssygvx_work.c
lapacke_ssyrfs.c
lapacke_ssyrfs_work.c
lapacke_ssysv.c
lapacke_ssysv_rook.c
lapacke_ssysv_rook_work.c
lapacke_ssysv_work.c
lapacke_ssysv_aa.c
lapacke_ssysv_aa_work.c
@ -1616,6 +1630,8 @@ lapacke_ssysv_aa_2stage.c
lapacke_ssysv_aa_2stage_work.c
lapacke_ssysv_rk.c
lapacke_ssysv_rk_work.c
lapacke_ssysv_rook.c
lapacke_ssysv_rook_work.c
lapacke_ssysvx.c
lapacke_ssysvx_work.c
lapacke_ssyswapr.c
@ -1624,33 +1640,33 @@ lapacke_ssytrd.c
lapacke_ssytrd_work.c
lapacke_ssytrf.c
lapacke_ssytrf_work.c
lapacke_ssytrf_rook.c
lapacke_ssytrf_rook_work.c
lapacke_ssytrf_aa.c
lapacke_ssytrf_aa_2stage.c
lapacke_ssytrf_aa_work.c
lapacke_ssytrf_aa_2stage.c
lapacke_ssytrf_aa_2stage_work.c
lapacke_ssytrf_rk.c
lapacke_ssytrf_rk_work.c
lapacke_ssytrf_rook.c
lapacke_ssytrf_rook_work.c
lapacke_ssytri.c
lapacke_ssytri_work.c
lapacke_ssytri2.c
lapacke_ssytri2_work.c
lapacke_ssytri_3.c
lapacke_ssytri_3_work.c
lapacke_ssytri2x.c
lapacke_ssytri2x_work.c
lapacke_ssytri_work.c
lapacke_ssytri_3.c
lapacke_ssytri_3_work.c
lapacke_ssytrs.c
lapacke_ssytrs_rook.c
lapacke_ssytrs_work.c
lapacke_ssytrs2.c
lapacke_ssytrs2_work.c
lapacke_ssytrs_aa.c
lapacke_ssytrs_aa_2stage.c
lapacke_ssytrs_aa_work.c
lapacke_ssytrs_aa_2stage_work.c
lapacke_ssytrs_3.c
lapacke_ssytrs_3_work.c
lapacke_ssytrs_work.c
lapacke_ssytrs_aa.c
lapacke_ssytrs_aa_work.c
lapacke_ssytrs_aa_2stage.c
lapacke_ssytrs_aa_2stage_work.c
lapacke_ssytrs_rook.c
lapacke_ssytrs_rook_work.c
lapacke_stbcon.c
lapacke_stbcon_work.c
@ -1722,6 +1738,8 @@ lapacke_strttp.c
lapacke_strttp_work.c
lapacke_stzrzf.c
lapacke_stzrzf_work.c
)
set(SOURCES_COMPLEX16
lapacke_zbbcsd.c
lapacke_zbbcsd_work.c
lapacke_zbdsqr.c
@ -1805,11 +1823,11 @@ lapacke_zgeqrf_work.c
lapacke_zgeqrfp.c
lapacke_zgeqrfp_work.c
lapacke_zgeqrt.c
lapacke_zgeqrt_work.c
lapacke_zgeqrt2.c
lapacke_zgeqrt2_work.c
lapacke_zgeqrt3.c
lapacke_zgeqrt3_work.c
lapacke_zgeqrt_work.c
lapacke_zgerfs.c
lapacke_zgerfs_work.c
lapacke_zgerqf.c
@ -1820,6 +1838,8 @@ lapacke_zgesv.c
lapacke_zgesv_work.c
lapacke_zgesvd.c
lapacke_zgesvd_work.c
lapacke_zgesvdq.c
lapacke_zgesvdq_work.c
lapacke_zgesvdx.c
lapacke_zgesvdx_work.c
lapacke_zgesvj.c
@ -1856,10 +1876,10 @@ lapacke_zggevx.c
lapacke_zggevx_work.c
lapacke_zggglm.c
lapacke_zggglm_work.c
lapacke_zgghrd.c
lapacke_zgghrd_work.c
lapacke_zgghd3.c
lapacke_zgghd3_work.c
lapacke_zgghrd.c
lapacke_zgghrd_work.c
lapacke_zgglse.c
lapacke_zgglse_work.c
lapacke_zggqrf.c
@ -1884,14 +1904,14 @@ lapacke_zgttrs.c
lapacke_zgttrs_work.c
lapacke_zhbev.c
lapacke_zhbev_work.c
lapacke_zhbevd.c
lapacke_zhbevd_work.c
lapacke_zhbevx.c
lapacke_zhbevx_work.c
lapacke_zhbev_2stage.c
lapacke_zhbev_2stage_work.c
lapacke_zhbevd.c
lapacke_zhbevd_work.c
lapacke_zhbevd_2stage.c
lapacke_zhbevd_2stage_work.c
lapacke_zhbevx.c
lapacke_zhbevx_work.c
lapacke_zhbevx_2stage.c
lapacke_zhbevx_2stage_work.c
lapacke_zhbgst.c
@ -1912,18 +1932,18 @@ lapacke_zheequb.c
lapacke_zheequb_work.c
lapacke_zheev.c
lapacke_zheev_work.c
lapacke_zheevd.c
lapacke_zheevd_work.c
lapacke_zheevr.c
lapacke_zheevr_work.c
lapacke_zheevx.c
lapacke_zheevx_work.c
lapacke_zheev_2stage.c
lapacke_zheev_2stage_work.c
lapacke_zheevd.c
lapacke_zheevd_work.c
lapacke_zheevd_2stage.c
lapacke_zheevd_2stage_work.c
lapacke_zheevr.c
lapacke_zheevr_work.c
lapacke_zheevr_2stage.c
lapacke_zheevr_2stage_work.c
lapacke_zheevx.c
lapacke_zheevx_work.c
lapacke_zheevx_2stage.c
lapacke_zheevx_2stage_work.c
lapacke_zhegst.c
@ -1941,8 +1961,8 @@ lapacke_zherfs_work.c
lapacke_zhesv.c
lapacke_zhesv_work.c
lapacke_zhesv_aa.c
lapacke_zhesv_aa_2stage.c
lapacke_zhesv_aa_work.c
lapacke_zhesv_aa_2stage.c
lapacke_zhesv_aa_2stage_work.c
lapacke_zhesv_rk.c
lapacke_zhesv_rk_work.c
@ -1953,34 +1973,34 @@ lapacke_zheswapr_work.c
lapacke_zhetrd.c
lapacke_zhetrd_work.c
lapacke_zhetrf.c
lapacke_zhetrf_rook.c
lapacke_zhetrf_work.c
lapacke_zhetrf_rook_work.c
lapacke_zhetrf_aa.c
lapacke_zhetrf_aa_2stage.c
lapacke_zhetrf_aa_work.c
lapacke_zhetrf_aa_2stage.c
lapacke_zhetrf_aa_2stage_work.c
lapacke_zhetrf_rk.c
lapacke_zhetrf_rk_work.c
lapacke_zhetrf_rook.c
lapacke_zhetrf_rook_work.c
lapacke_zhetri.c
lapacke_zhetri_work.c
lapacke_zhetri2.c
lapacke_zhetri2_work.c
lapacke_zhetri_3.c
lapacke_zhetri_3_work.c
lapacke_zhetri2x.c
lapacke_zhetri2x_work.c
lapacke_zhetri_work.c
lapacke_zhetri_3.c
lapacke_zhetri_3_work.c
lapacke_zhetrs.c
lapacke_zhetrs_rook.c
lapacke_zhetrs_work.c
lapacke_zhetrs2.c
lapacke_zhetrs2_work.c
lapacke_zhetrs_work.c
lapacke_zhetrs_aa.c
lapacke_zhetrs_aa_2stage.c
lapacke_zhetrs_aa_work.c
lapacke_zhetrs_aa_2stage_work.c
lapacke_zhetrs_3.c
lapacke_zhetrs_3_work.c
lapacke_zhetrs_aa.c
lapacke_zhetrs_aa_work.c
lapacke_zhetrs_aa_2stage.c
lapacke_zhetrs_aa_2stage_work.c
lapacke_zhetrs_rook.c
lapacke_zhetrs_rook_work.c
lapacke_zhfrk.c
lapacke_zhfrk_work.c
@ -2172,52 +2192,54 @@ lapacke_zsyconv.c
lapacke_zsyconv_work.c
lapacke_zsyequb.c
lapacke_zsyequb_work.c
lapacke_zsyr.c
lapacke_zsyr_work.c
lapacke_zsyrfs.c
lapacke_zsyrfs_work.c
lapacke_zsysv.c
lapacke_zsysv_rook.c
lapacke_zsysv_rook_work.c
lapacke_zsysv_work.c
lapacke_zsysv_aa.c
lapacke_zsysv_aa_2stage.c
lapacke_zsysv_aa_work.c
lapacke_zsysv_aa_2stage.c
lapacke_zsysv_aa_2stage_work.c
lapacke_zsysv_rk.c
lapacke_zsysv_rk_work.c
lapacke_zsysv_rook.c
lapacke_zsysv_rook_work.c
lapacke_zsysvx.c
lapacke_zsysvx_work.c
lapacke_zsyswapr.c
lapacke_zsyswapr_work.c
lapacke_zsytrf.c
lapacke_zsytrf_work.c
lapacke_zsytrf_rook.c
lapacke_zsytrf_rook_work.c
lapacke_zsytrf_aa.c
lapacke_zsytrf_aa_2stage.c
lapacke_zsytrf_aa_work.c
lapacke_zsytrf_aa_2stage.c
lapacke_zsytrf_aa_2stage_work.c
lapacke_zsytrf_rk.c
lapacke_zsytrf_rk_work.c
lapacke_zsytrf_rook.c
lapacke_zsytrf_rook_work.c
lapacke_zsytri.c
lapacke_zsytri_work.c
lapacke_zsytri2.c
lapacke_zsytri2_work.c
lapacke_zsytri_3.c
lapacke_zsytri_3_work.c
lapacke_zsytri2x.c
lapacke_zsytri2x_work.c
lapacke_zsytri_work.c
lapacke_zsytri_3.c
lapacke_zsytri_3_work.c
lapacke_zsytrs.c
lapacke_zsytrs_rook.c
lapacke_zsytrs_work.c
lapacke_zsytrs2.c
lapacke_zsytrs2_work.c
lapacke_zsytrs_work.c
lapacke_zsytrs_rook_work.c
lapacke_zsytrs_aa.c
lapacke_zsytrs_aa_2stage.c
lapacke_zsytrs_aa_work.c
lapacke_zsytrs_aa_2stage_work.c
lapacke_zsytrs_3.c
lapacke_zsytrs_3_work.c
lapacke_zsytrs_aa.c
lapacke_zsytrs_aa_work.c
lapacke_zsytrs_aa_2stage.c
lapacke_zsytrs_aa_2stage_work.c
lapacke_zsytrs_rook.c
lapacke_zsytrs_rook_work.c
lapacke_ztbcon.c
lapacke_ztbcon_work.c
lapacke_ztbrfs.c
@ -2249,9 +2271,9 @@ lapacke_ztpcon_work.c
lapacke_ztpmqrt.c
lapacke_ztpmqrt_work.c
lapacke_ztpqrt.c
lapacke_ztpqrt_work.c
lapacke_ztpqrt2.c
lapacke_ztpqrt2_work.c
lapacke_ztpqrt_work.c
lapacke_ztprfb.c
lapacke_ztprfb_work.c
lapacke_ztprfs.c
@ -2328,11 +2350,6 @@ lapacke_zupgtr.c
lapacke_zupgtr_work.c
lapacke_zupmtr.c
lapacke_zupmtr_work.c
lapacke_zsyr.c
lapacke_csyr.c
lapacke_zsyr_work.c
lapacke_csyr_work.c
lapacke_ilaver.c
)
set(DEPRECATED

View File

@ -32,12 +32,21 @@
##############################################################################
# makefile for LAPACKE, used to build lapacke binary.
#
# Note: we use multiple OBJ_A, OBJ_B, etc, instead of a single OBJ
# Note: we use multiple OBJ_S, OBJ_C, etc, instead of a single OBJ
# to allow build with mingw (argument list too long for the msys ar)
#
include ../../make.inc
TOPSRCDIR = ../..
include $(TOPSRCDIR)/make.inc
OBJ_A = \
.SUFFIXES: .c .o
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<
OBJ = \
lapacke_ilaver.o \
lapacke_nancheck.o
OBJ_C = \
lapacke_cbbcsd.o \
lapacke_cbbcsd_work.o \
lapacke_cbdsqr.o \
@ -82,12 +91,12 @@ lapacke_cgeevx.o \
lapacke_cgeevx_work.o \
lapacke_cgehrd.o \
lapacke_cgehrd_work.o \
lapacke_cgejsv.o \
lapacke_cgejsv_work.o \
lapacke_cgelq.o \
lapacke_cgelq_work.o \
lapacke_cgelq2.o \
lapacke_cgelq2_work.o \
lapacke_cgejsv.o \
lapacke_cgejsv_work.o \
lapacke_cgelqf.o \
lapacke_cgelqf_work.o \
lapacke_cgels.o \
@ -117,11 +126,11 @@ lapacke_cgeqrf_work.o \
lapacke_cgeqrfp.o \
lapacke_cgeqrfp_work.o \
lapacke_cgeqrt.o \
lapacke_cgeqrt_work.o \
lapacke_cgeqrt2.o \
lapacke_cgeqrt2_work.o \
lapacke_cgeqrt3.o \
lapacke_cgeqrt3_work.o \
lapacke_cgeqrt_work.o \
lapacke_cgerfs.o \
lapacke_cgerfs_work.o \
lapacke_cgerqf.o \
@ -132,6 +141,8 @@ lapacke_cgesv.o \
lapacke_cgesv_work.o \
lapacke_cgesvd.o \
lapacke_cgesvd_work.o \
lapacke_cgesvdq.o \
lapacke_cgesvdq_work.o \
lapacke_cgesvdx.o \
lapacke_cgesvdx_work.o \
lapacke_cgesvj.o \
@ -168,10 +179,10 @@ lapacke_cggevx.o \
lapacke_cggevx_work.o \
lapacke_cggglm.o \
lapacke_cggglm_work.o \
lapacke_cgghrd.o \
lapacke_cgghrd_work.o \
lapacke_cgghd3.o \
lapacke_cgghd3_work.o \
lapacke_cgghrd.o \
lapacke_cgghrd_work.o \
lapacke_cgglse.o \
lapacke_cgglse_work.o \
lapacke_cggqrf.o \
@ -196,14 +207,14 @@ lapacke_cgttrs.o \
lapacke_cgttrs_work.o \
lapacke_chbev.o \
lapacke_chbev_work.o \
lapacke_chbevd.o \
lapacke_chbevd_work.o \
lapacke_chbevx.o \
lapacke_chbevx_work.o \
lapacke_chbev_2stage.o \
lapacke_chbev_2stage_work.o \
lapacke_chbevd.o \
lapacke_chbevd_work.o \
lapacke_chbevd_2stage.o \
lapacke_chbevd_2stage_work.o \
lapacke_chbevx.o \
lapacke_chbevx_work.o \
lapacke_chbevx_2stage.o \
lapacke_chbevx_2stage_work.o \
lapacke_chbgst.o \
@ -224,18 +235,18 @@ lapacke_cheequb.o \
lapacke_cheequb_work.o \
lapacke_cheev.o \
lapacke_cheev_work.o \
lapacke_cheevd.o \
lapacke_cheevd_work.o \
lapacke_cheevr.o \
lapacke_cheevr_work.o \
lapacke_cheevx.o \
lapacke_cheevx_work.o \
lapacke_cheev_2stage.o \
lapacke_cheev_2stage_work.o \
lapacke_cheevd.o \
lapacke_cheevd_work.o \
lapacke_cheevd_2stage.o \
lapacke_cheevd_2stage_work.o \
lapacke_cheevr.o \
lapacke_cheevr_work.o \
lapacke_cheevr_2stage.o \
lapacke_cheevr_2stage_work.o \
lapacke_cheevx.o \
lapacke_cheevx_work.o \
lapacke_cheevx_2stage.o \
lapacke_cheevx_2stage_work.o \
lapacke_chegst.o \
@ -265,35 +276,35 @@ lapacke_cheswapr_work.o \
lapacke_chetrd.o \
lapacke_chetrd_work.o \
lapacke_chetrf.o \
lapacke_chetrf_rook.o \
lapacke_chetrf_work.o \
lapacke_chetrf_rook_work.o \
lapacke_chetrf_aa.o \
lapacke_chetrf_aa_2stage.o \
lapacke_chetrf_aa_work.o \
lapacke_chetrf_aa_2stage.o \
lapacke_chetrf_aa_2stage_work.o \
lapacke_chetrf_rk.o \
lapacke_chetrf_rk_work.o \
lapacke_chetrf_rook.o \
lapacke_chetrf_rook_work.o \
lapacke_chetri.o \
lapacke_chetri_work.o \
lapacke_chetri2.o \
lapacke_chetri2_work.o \
lapacke_chetri_3.o \
lapacke_chetri_3_work.o \
lapacke_chetri2x.o \
lapacke_chetri2x_work.o \
lapacke_chetri_work.o \
lapacke_chetri_3.o \
lapacke_chetri_3_work.o \
lapacke_chetrs.o \
lapacke_chetrs_rook.o \
lapacke_chetrs_work.o \
lapacke_chetrs2.o \
lapacke_chetrs2_work.o \
lapacke_chetrs_work.o \
lapacke_chetrs_rook_work.o \
lapacke_chetrs_aa.o \
lapacke_chetrs_aa_2stage.o \
lapacke_chetrs_aa_work.o \
lapacke_chetrs_aa_2stage_work.o \
lapacke_chetrs_3.o \
lapacke_chetrs_3_work.o \
lapacke_chetrs_aa.o \
lapacke_chetrs_aa_work.o \
lapacke_chetrs_aa_2stage.o \
lapacke_chetrs_aa_2stage_work.o \
lapacke_chetrs_rook.o \
lapacke_chetrs_rook_work.o \
lapacke_chfrk.o \
lapacke_chfrk_work.o \
lapacke_chgeqz.o \
@ -484,11 +495,11 @@ lapacke_csyconv.o \
lapacke_csyconv_work.o \
lapacke_csyequb.o \
lapacke_csyequb_work.o \
lapacke_csyr.o \
lapacke_csyr_work.o \
lapacke_csyrfs.o \
lapacke_csyrfs_work.o \
lapacke_csysv.o \
lapacke_csysv_rook.o \
lapacke_csysv_rook_work.o \
lapacke_csysv_work.o \
lapacke_csysv_aa.o \
lapacke_csysv_aa_work.o \
@ -496,40 +507,42 @@ lapacke_csysv_aa_2stage.o \
lapacke_csysv_aa_2stage_work.o \
lapacke_csysv_rk.o \
lapacke_csysv_rk_work.o \
lapacke_csysv_rook.o \
lapacke_csysv_rook_work.o \
lapacke_csysvx.o \
lapacke_csysvx_work.o \
lapacke_csyswapr.o \
lapacke_csyswapr_work.o \
lapacke_csytrf.o \
lapacke_csytrf_work.o \
lapacke_csytrf_rook.o \
lapacke_csytrf_rook_work.o \
lapacke_csytrf_aa.o \
lapacke_csytrf_aa_2stage.o \
lapacke_csytrf_aa_work.o \
lapacke_csytrf_aa_2stage.o \
lapacke_csytrf_aa_2stage_work.o \
lapacke_csytrf_rk.o \
lapacke_csytrf_rk_work.o \
lapacke_csytrf_rook.o \
lapacke_csytrf_rook_work.o \
lapacke_csytri.o \
lapacke_csytri_work.o \
lapacke_csytri2.o \
lapacke_csytri2_work.o \
lapacke_csytri_3.o \
lapacke_csytri_3_work.o \
lapacke_csytri2x.o \
lapacke_csytri2x_work.o \
lapacke_csytri_work.o \
lapacke_csytri_3.o \
lapacke_csytri_3_work.o \
lapacke_csytrs.o \
lapacke_csytrs_rook.o \
lapacke_csytrs_work.o \
lapacke_csytrs2.o \
lapacke_csytrs2_work.o \
lapacke_csytrs_work.o \
lapacke_csytrs_rook_work.o \
lapacke_csytrs_aa.o \
lapacke_csytrs_aa_2stage.o \
lapacke_csytrs_aa_work.o \
lapacke_csytrs_aa_2stage_work.o \
lapacke_csytrs_3.o \
lapacke_csytrs_3_work.o \
lapacke_csytrs_aa.o \
lapacke_csytrs_aa_work.o \
lapacke_csytrs_aa_2stage.o \
lapacke_csytrs_aa_2stage_work.o \
lapacke_csytrs_rook.o \
lapacke_csytrs_rook_work.o \
lapacke_ctbcon.o \
lapacke_ctbcon_work.o \
lapacke_ctbrfs.o \
@ -561,9 +574,9 @@ lapacke_ctpcon_work.o \
lapacke_ctpmqrt.o \
lapacke_ctpmqrt_work.o \
lapacke_ctpqrt.o \
lapacke_ctpqrt_work.o \
lapacke_ctpqrt2.o \
lapacke_ctpqrt2_work.o \
lapacke_ctpqrt_work.o \
lapacke_ctprfb.o \
lapacke_ctprfb_work.o \
lapacke_ctprfs.o \
@ -639,15 +652,17 @@ lapacke_cunmtr_work.o \
lapacke_cupgtr.o \
lapacke_cupgtr_work.o \
lapacke_cupmtr.o \
lapacke_cupmtr_work.o \
lapacke_cupmtr_work.o
OBJ_D = \
lapacke_dbbcsd.o \
lapacke_dbbcsd_work.o \
lapacke_dbdsdc.o \
lapacke_dbdsdc_work.o \
lapacke_dbdsvdx.o \
lapacke_dbdsvdx_work.o \
lapacke_dbdsqr.o \
lapacke_dbdsqr_work.o \
lapacke_dbdsvdx.o \
lapacke_dbdsvdx_work.o \
lapacke_ddisna.o \
lapacke_ddisna_work.o \
lapacke_dgbbrd.o \
@ -725,11 +740,11 @@ lapacke_dgeqrf_work.o \
lapacke_dgeqrfp.o \
lapacke_dgeqrfp_work.o \
lapacke_dgeqrt.o \
lapacke_dgeqrt_work.o \
lapacke_dgeqrt2.o \
lapacke_dgeqrt2_work.o \
lapacke_dgeqrt3.o \
lapacke_dgeqrt3_work.o \
lapacke_dgeqrt_work.o \
lapacke_dgerfs.o \
lapacke_dgerfs_work.o \
lapacke_dgerqf.o \
@ -740,6 +755,8 @@ lapacke_dgesv.o \
lapacke_dgesv_work.o \
lapacke_dgesvd.o \
lapacke_dgesvd_work.o \
lapacke_dgesvdq.o \
lapacke_dgesvdq_work.o \
lapacke_dgesvdx.o \
lapacke_dgesvdx_work.o \
lapacke_dgesvj.o \
@ -776,10 +793,10 @@ lapacke_dggevx.o \
lapacke_dggevx_work.o \
lapacke_dggglm.o \
lapacke_dggglm_work.o \
lapacke_dgghrd.o \
lapacke_dgghrd_work.o \
lapacke_dgghd3.o \
lapacke_dgghd3_work.o \
lapacke_dgghrd.o \
lapacke_dgghrd_work.o \
lapacke_dgglse.o \
lapacke_dgglse_work.o \
lapacke_dggqrf.o \
@ -972,14 +989,14 @@ lapacke_dpttrs.o \
lapacke_dpttrs_work.o \
lapacke_dsbev.o \
lapacke_dsbev_work.o \
lapacke_dsbevd.o \
lapacke_dsbevd_work.o \
lapacke_dsbevx.o \
lapacke_dsbevx_work.o \
lapacke_dsbev_2stage.o \
lapacke_dsbev_2stage_work.o \
lapacke_dsbevd.o \
lapacke_dsbevd_work.o \
lapacke_dsbevd_2stage.o \
lapacke_dsbevd_2stage_work.o \
lapacke_dsbevx.o \
lapacke_dsbevx_work.o \
lapacke_dsbevx_2stage.o \
lapacke_dsbevx_2stage_work.o \
lapacke_dsbgst.o \
@ -1060,18 +1077,18 @@ lapacke_dsyequb.o \
lapacke_dsyequb_work.o \
lapacke_dsyev.o \
lapacke_dsyev_work.o \
lapacke_dsyevd.o \
lapacke_dsyevd_work.o \
lapacke_dsyevr.o \
lapacke_dsyevr_work.o \
lapacke_dsyevx.o \
lapacke_dsyevx_work.o \
lapacke_dsyev_2stage.o \
lapacke_dsyev_2stage_work.o \
lapacke_dsyevd.o \
lapacke_dsyevd_work.o \
lapacke_dsyevd_2stage.o \
lapacke_dsyevd_2stage_work.o \
lapacke_dsyevr.o \
lapacke_dsyevr_work.o \
lapacke_dsyevr_2stage.o \
lapacke_dsyevr_2stage_work.o \
lapacke_dsyevx.o \
lapacke_dsyevx_work.o \
lapacke_dsyevx_2stage.o \
lapacke_dsyevx_2stage_work.o \
lapacke_dsygst.o \
@ -1087,8 +1104,6 @@ lapacke_dsygvx_work.o \
lapacke_dsyrfs.o \
lapacke_dsyrfs_work.o \
lapacke_dsysv.o \
lapacke_dsysv_rook.o \
lapacke_dsysv_rook_work.o \
lapacke_dsysv_work.o \
lapacke_dsysv_aa.o \
lapacke_dsysv_aa_work.o \
@ -1096,6 +1111,8 @@ lapacke_dsysv_aa_2stage.o \
lapacke_dsysv_aa_2stage_work.o \
lapacke_dsysv_rk.o \
lapacke_dsysv_rk_work.o \
lapacke_dsysv_rook.o \
lapacke_dsysv_rook_work.o \
lapacke_dsysvx.o \
lapacke_dsysvx_work.o \
lapacke_dsyswapr.o \
@ -1104,36 +1121,34 @@ lapacke_dsytrd.o \
lapacke_dsytrd_work.o \
lapacke_dsytrf.o \
lapacke_dsytrf_work.o \
lapacke_dsytrf_rook.o \
lapacke_dsytrf_rook_work.o \
lapacke_dsytrf_aa.o \
lapacke_dsytrf_aa_work.o \
lapacke_dsytrf_aa_2stage.o \
lapacke_dsytrf_aa_2stage_work.o \
lapacke_dsytrf_rk.o \
lapacke_dsytrf_rk_work.o \
lapacke_dsytrf_rook.o \
lapacke_dsytrf_rook_work.o \
lapacke_dsytri.o \
lapacke_dsytri_work.o \
lapacke_dsytri2.o \
lapacke_dsytri2_work.o \
lapacke_dsytri_3.o \
lapacke_dsytri_3_work.o \
lapacke_dsytri2x.o \
lapacke_dsytri2x_work.o \
lapacke_dsytri_work.o
OBJ_B = \
lapacke_dsytri_3.o \
lapacke_dsytri_3_work.o \
lapacke_dsytrs.o \
lapacke_dsytrs_rook.o \
lapacke_dsytrs_work.o \
lapacke_dsytrs2.o \
lapacke_dsytrs2_work.o \
lapacke_dsytrs_work.o \
lapacke_dsytrs_rook_work.o \
lapacke_dsytrs_aa.o \
lapacke_dsytrs_aa_2stage.o \
lapacke_dsytrs_aa_work.o \
lapacke_dsytrs_aa_2stage_work.o \
lapacke_dsytrs_3.o \
lapacke_dsytrs_3_work.o \
lapacke_dsytrs_aa.o \
lapacke_dsytrs_aa_work.o \
lapacke_dsytrs_aa_2stage.o \
lapacke_dsytrs_aa_2stage_work.o \
lapacke_dsytrs_rook.o \
lapacke_dsytrs_rook_work.o \
lapacke_dtbcon.o \
lapacke_dtbcon_work.o \
lapacke_dtbrfs.o \
@ -1165,9 +1180,9 @@ lapacke_dtpcon_work.o \
lapacke_dtpmqrt.o \
lapacke_dtpmqrt_work.o \
lapacke_dtpqrt.o \
lapacke_dtpqrt_work.o \
lapacke_dtpqrt2.o \
lapacke_dtpqrt2_work.o \
lapacke_dtpqrt_work.o \
lapacke_dtprfb.o \
lapacke_dtprfb_work.o \
lapacke_dtprfs.o \
@ -1203,16 +1218,17 @@ lapacke_dtrttf_work.o \
lapacke_dtrttp.o \
lapacke_dtrttp_work.o \
lapacke_dtzrzf.o \
lapacke_dtzrzf_work.o \
lapacke_nancheck.o \
lapacke_dtzrzf_work.o
OBJ_S = \
lapacke_sbbcsd.o \
lapacke_sbbcsd_work.o \
lapacke_sbdsdc.o \
lapacke_sbdsdc_work.o \
lapacke_sbdsvdx.o \
lapacke_sbdsvdx_work.o \
lapacke_sbdsqr.o \
lapacke_sbdsqr_work.o \
lapacke_sbdsvdx.o \
lapacke_sbdsvdx_work.o \
lapacke_sdisna.o \
lapacke_sdisna_work.o \
lapacke_sgbbrd.o \
@ -1290,11 +1306,11 @@ lapacke_sgeqrf_work.o \
lapacke_sgeqrfp.o \
lapacke_sgeqrfp_work.o \
lapacke_sgeqrt.o \
lapacke_sgeqrt_work.o \
lapacke_sgeqrt2.o \
lapacke_sgeqrt2_work.o \
lapacke_sgeqrt3.o \
lapacke_sgeqrt3_work.o \
lapacke_sgeqrt_work.o \
lapacke_sgerfs.o \
lapacke_sgerfs_work.o \
lapacke_sgerqf.o \
@ -1305,6 +1321,8 @@ lapacke_sgesv.o \
lapacke_sgesv_work.o \
lapacke_sgesvd.o \
lapacke_sgesvd_work.o \
lapacke_sgesvdq.o \
lapacke_sgesvdq_work.o \
lapacke_sgesvdx.o \
lapacke_sgesvdx_work.o \
lapacke_sgesvj.o \
@ -1341,10 +1359,10 @@ lapacke_sggevx.o \
lapacke_sggevx_work.o \
lapacke_sggglm.o \
lapacke_sggglm_work.o \
lapacke_sgghrd.o \
lapacke_sgghrd_work.o \
lapacke_sgghd3.o \
lapacke_sgghd3_work.o \
lapacke_sgghrd.o \
lapacke_sgghrd_work.o \
lapacke_sgglse.o \
lapacke_sgglse_work.o \
lapacke_sggqrf.o \
@ -1537,14 +1555,14 @@ lapacke_spttrs.o \
lapacke_spttrs_work.o \
lapacke_ssbev.o \
lapacke_ssbev_work.o \
lapacke_ssbevd.o \
lapacke_ssbevd_work.o \
lapacke_ssbevx.o \
lapacke_ssbevx_work.o \
lapacke_ssbev_2stage.o \
lapacke_ssbev_2stage_work.o \
lapacke_ssbevd.o \
lapacke_ssbevd_work.o \
lapacke_ssbevd_2stage.o \
lapacke_ssbevd_2stage_work.o \
lapacke_ssbevx.o \
lapacke_ssbevx_work.o \
lapacke_ssbevx_2stage.o \
lapacke_ssbevx_2stage_work.o \
lapacke_ssbgst.o \
@ -1621,18 +1639,18 @@ lapacke_ssyequb.o \
lapacke_ssyequb_work.o \
lapacke_ssyev.o \
lapacke_ssyev_work.o \
lapacke_ssyevd.o \
lapacke_ssyevd_work.o \
lapacke_ssyevr.o \
lapacke_ssyevr_work.o \
lapacke_ssyevx.o \
lapacke_ssyevx_work.o \
lapacke_ssyev_2stage.o \
lapacke_ssyev_2stage_work.o \
lapacke_ssyevd.o \
lapacke_ssyevd_work.o \
lapacke_ssyevd_2stage.o \
lapacke_ssyevd_2stage_work.o \
lapacke_ssyevr.o \
lapacke_ssyevr_work.o \
lapacke_ssyevr_2stage.o \
lapacke_ssyevr_2stage_work.o \
lapacke_ssyevx.o \
lapacke_ssyevx_work.o \
lapacke_ssyevx_2stage.o \
lapacke_ssyevx_2stage_work.o \
lapacke_ssygst.o \
@ -1648,8 +1666,6 @@ lapacke_ssygvx_work.o \
lapacke_ssyrfs.o \
lapacke_ssyrfs_work.o \
lapacke_ssysv.o \
lapacke_ssysv_rook.o \
lapacke_ssysv_rook_work.o \
lapacke_ssysv_work.o \
lapacke_ssysv_aa.o \
lapacke_ssysv_aa_work.o \
@ -1657,6 +1673,8 @@ lapacke_ssysv_aa_2stage.o \
lapacke_ssysv_aa_2stage_work.o \
lapacke_ssysv_rk.o \
lapacke_ssysv_rk_work.o \
lapacke_ssysv_rook.o \
lapacke_ssysv_rook_work.o \
lapacke_ssysvx.o \
lapacke_ssysvx_work.o \
lapacke_ssyswapr.o \
@ -1665,34 +1683,34 @@ lapacke_ssytrd.o \
lapacke_ssytrd_work.o \
lapacke_ssytrf.o \
lapacke_ssytrf_work.o \
lapacke_ssytrf_rook.o \
lapacke_ssytrf_rook_work.o \
lapacke_ssytrf_aa.o \
lapacke_ssytrf_aa_work.o \
lapacke_ssytrf_aa_2stage.o \
lapacke_ssytrf_aa_2stage_work.o \
lapacke_ssytrf_rk.o \
lapacke_ssytrf_rk_work.o \
lapacke_ssytrf_rook.o \
lapacke_ssytrf_rook_work.o \
lapacke_ssytri.o \
lapacke_ssytri_work.o \
lapacke_ssytri2.o \
lapacke_ssytri2_work.o \
lapacke_ssytri_3.o \
lapacke_ssytri_3_work.o \
lapacke_ssytri2x.o \
lapacke_ssytri2x_work.o \
lapacke_ssytri_work.o \
lapacke_ssytri_3.o \
lapacke_ssytri_3_work.o \
lapacke_ssytrs.o \
lapacke_ssytrs_rook.o \
lapacke_ssytrs_work.o \
lapacke_ssytrs2.o \
lapacke_ssytrs2_work.o \
lapacke_ssytrs_work.o \
lapacke_ssytrs_rook_work.o \
lapacke_ssytrs_aa.o \
lapacke_ssytrs_aa_2stage.o \
lapacke_ssytrs_aa_work.o \
lapacke_ssytrs_aa_2stage_work.o \
lapacke_ssytrs_3.o \
lapacke_ssytrs_3_work.o \
lapacke_ssytrs_aa.o \
lapacke_ssytrs_aa_work.o \
lapacke_ssytrs_aa_2stage.o \
lapacke_ssytrs_aa_2stage_work.o \
lapacke_ssytrs_rook.o \
lapacke_ssytrs_rook_work.o \
lapacke_stbcon.o \
lapacke_stbcon_work.o \
lapacke_stbrfs.o \
@ -1762,7 +1780,9 @@ lapacke_strttf_work.o \
lapacke_strttp.o \
lapacke_strttp_work.o \
lapacke_stzrzf.o \
lapacke_stzrzf_work.o \
lapacke_stzrzf_work.o
OBJ_Z = \
lapacke_zbbcsd.o \
lapacke_zbbcsd_work.o \
lapacke_zbdsqr.o \
@ -1846,11 +1866,11 @@ lapacke_zgeqrf_work.o \
lapacke_zgeqrfp.o \
lapacke_zgeqrfp_work.o \
lapacke_zgeqrt.o \
lapacke_zgeqrt_work.o \
lapacke_zgeqrt2.o \
lapacke_zgeqrt2_work.o \
lapacke_zgeqrt3.o \
lapacke_zgeqrt3_work.o \
lapacke_zgeqrt_work.o \
lapacke_zgerfs.o \
lapacke_zgerfs_work.o \
lapacke_zgerqf.o \
@ -1861,6 +1881,8 @@ lapacke_zgesv.o \
lapacke_zgesv_work.o \
lapacke_zgesvd.o \
lapacke_zgesvd_work.o \
lapacke_zgesvdq.o \
lapacke_zgesvdq_work.o \
lapacke_zgesvdx.o \
lapacke_zgesvdx_work.o \
lapacke_zgesvj.o \
@ -1897,10 +1919,10 @@ lapacke_zggevx.o \
lapacke_zggevx_work.o \
lapacke_zggglm.o \
lapacke_zggglm_work.o \
lapacke_zgghrd.o \
lapacke_zgghrd_work.o \
lapacke_zgghd3.o \
lapacke_zgghd3_work.o \
lapacke_zgghrd.o \
lapacke_zgghrd_work.o \
lapacke_zgglse.o \
lapacke_zgglse_work.o \
lapacke_zggqrf.o \
@ -1925,14 +1947,14 @@ lapacke_zgttrs.o \
lapacke_zgttrs_work.o \
lapacke_zhbev.o \
lapacke_zhbev_work.o \
lapacke_zhbevd.o \
lapacke_zhbevd_work.o \
lapacke_zhbevx.o \
lapacke_zhbevx_work.o \
lapacke_zhbev_2stage.o \
lapacke_zhbev_2stage_work.o \
lapacke_zhbevd.o \
lapacke_zhbevd_work.o \
lapacke_zhbevd_2stage.o \
lapacke_zhbevd_2stage_work.o \
lapacke_zhbevx.o \
lapacke_zhbevx_work.o \
lapacke_zhbevx_2stage.o \
lapacke_zhbevx_2stage_work.o \
lapacke_zhbgst.o \
@ -1953,18 +1975,18 @@ lapacke_zheequb.o \
lapacke_zheequb_work.o \
lapacke_zheev.o \
lapacke_zheev_work.o \
lapacke_zheevd.o \
lapacke_zheevd_work.o \
lapacke_zheevr.o \
lapacke_zheevr_work.o \
lapacke_zheevx.o \
lapacke_zheevx_work.o \
lapacke_zheev_2stage.o \
lapacke_zheev_2stage_work.o \
lapacke_zheevd.o \
lapacke_zheevd_work.o \
lapacke_zheevd_2stage.o \
lapacke_zheevd_2stage_work.o \
lapacke_zheevr.o \
lapacke_zheevr_work.o \
lapacke_zheevr_2stage.o \
lapacke_zheevr_2stage_work.o \
lapacke_zheevx.o \
lapacke_zheevx_work.o \
lapacke_zheevx_2stage.o \
lapacke_zheevx_2stage_work.o \
lapacke_zhegst.o \
@ -1994,35 +2016,35 @@ lapacke_zheswapr_work.o \
lapacke_zhetrd.o \
lapacke_zhetrd_work.o \
lapacke_zhetrf.o \
lapacke_zhetrf_rook.o \
lapacke_zhetrf_work.o \
lapacke_zhetrf_rook_work.o \
lapacke_zhetrf_aa.o \
lapacke_zhetrf_aa_2stage.o \
lapacke_zhetrf_aa_work.o \
lapacke_zhetrf_aa_2stage.o \
lapacke_zhetrf_aa_2stage_work.o \
lapacke_zhetrf_rk.o \
lapacke_zhetrf_rk_work.o \
lapacke_zhetrf_rook.o \
lapacke_zhetrf_rook_work.o \
lapacke_zhetri.o \
lapacke_zhetri_work.o \
lapacke_zhetri2.o \
lapacke_zhetri2_work.o \
lapacke_zhetri_3.o \
lapacke_zhetri_3_work.o \
lapacke_zhetri2x.o \
lapacke_zhetri2x_work.o \
lapacke_zhetri_work.o \
lapacke_zhetri_3.o \
lapacke_zhetri_3_work.o \
lapacke_zhetrs.o \
lapacke_zhetrs_rook.o \
lapacke_zhetrs_work.o \
lapacke_zhetrs2.o \
lapacke_zhetrs2_work.o \
lapacke_zhetrs_work.o \
lapacke_zhetrs_rook_work.o \
lapacke_zhetrs_aa.o \
lapacke_zhetrs_aa_2stage.o \
lapacke_zhetrs_aa_work.o \
lapacke_zhetrs_aa_2stage_work.o \
lapacke_zhetrs_3.o \
lapacke_zhetrs_3_work.o \
lapacke_zhetrs_aa.o \
lapacke_zhetrs_aa_work.o \
lapacke_zhetrs_aa_2stage.o \
lapacke_zhetrs_aa_2stage_work.o \
lapacke_zhetrs_rook.o \
lapacke_zhetrs_rook_work.o \
lapacke_zhfrk.o \
lapacke_zhfrk_work.o \
lapacke_zhgeqz.o \
@ -2213,11 +2235,11 @@ lapacke_zsyconv.o \
lapacke_zsyconv_work.o \
lapacke_zsyequb.o \
lapacke_zsyequb_work.o \
lapacke_zsyr.o \
lapacke_zsyr_work.o \
lapacke_zsyrfs.o \
lapacke_zsyrfs_work.o \
lapacke_zsysv.o \
lapacke_zsysv_rook.o \
lapacke_zsysv_rook_work.o \
lapacke_zsysv_work.o \
lapacke_zsysv_aa.o \
lapacke_zsysv_aa_work.o \
@ -2225,40 +2247,42 @@ lapacke_zsysv_aa_2stage.o \
lapacke_zsysv_aa_2stage_work.o \
lapacke_zsysv_rk.o \
lapacke_zsysv_rk_work.o \
lapacke_zsysv_rook.o \
lapacke_zsysv_rook_work.o \
lapacke_zsysvx.o \
lapacke_zsysvx_work.o \
lapacke_zsyswapr.o \
lapacke_zsyswapr_work.o \
lapacke_zsytrf.o \
lapacke_zsytrf_work.o \
lapacke_zsytrf_rook.o \
lapacke_zsytrf_rook_work.o \
lapacke_zsytrf_aa.o \
lapacke_zsytrf_aa_2stage.o \
lapacke_zsytrf_aa_work.o \
lapacke_zsytrf_aa_2stage.o \
lapacke_zsytrf_aa_2stage_work.o \
lapacke_zsytrf_rk.o \
lapacke_zsytrf_rk_work.o \
lapacke_zsytrf_rook.o \
lapacke_zsytrf_rook_work.o \
lapacke_zsytri.o \
lapacke_zsytri_work.o \
lapacke_zsytri2.o \
lapacke_zsytri2_work.o \
lapacke_zsytri_3.o \
lapacke_zsytri_3_work.o \
lapacke_zsytri2x.o \
lapacke_zsytri2x_work.o \
lapacke_zsytri_work.o \
lapacke_zsytri_3.o \
lapacke_zsytri_3_work.o \
lapacke_zsytrs.o \
lapacke_zsytrs_rook.o \
lapacke_zsytrs_work.o \
lapacke_zsytrs2.o \
lapacke_zsytrs2_work.o \
lapacke_zsytrs_work.o \
lapacke_zsytrs_rook_work.o \
lapacke_zsytrs_aa.o \
lapacke_zsytrs_aa_2stage.o \
lapacke_zsytrs_aa_work.o \
lapacke_zsytrs_aa_2stage_work.o \
lapacke_zsytrs_3.o \
lapacke_zsytrs_3_work.o \
lapacke_zsytrs_aa.o \
lapacke_zsytrs_aa_work.o \
lapacke_zsytrs_aa_2stage.o \
lapacke_zsytrs_aa_2stage_work.o \
lapacke_zsytrs_rook.o \
lapacke_zsytrs_rook_work.o \
lapacke_ztbcon.o \
lapacke_ztbcon_work.o \
lapacke_ztbrfs.o \
@ -2290,9 +2314,9 @@ lapacke_ztpcon_work.o \
lapacke_ztpmqrt.o \
lapacke_ztpmqrt_work.o \
lapacke_ztpqrt.o \
lapacke_ztpqrt_work.o \
lapacke_ztpqrt2.o \
lapacke_ztpqrt2_work.o \
lapacke_ztpqrt_work.o \
lapacke_ztprfb.o \
lapacke_ztprfb_work.o \
lapacke_ztprfs.o \
@ -2368,12 +2392,7 @@ lapacke_zunmtr_work.o \
lapacke_zupgtr.o \
lapacke_zupgtr_work.o \
lapacke_zupmtr.o \
lapacke_zupmtr_work.o \
lapacke_zsyr.o \
lapacke_csyr.o \
lapacke_zsyr_work.o \
lapacke_csyr_work.o \
lapacke_ilaver.o
lapacke_zupmtr_work.o
ifdef BUILD_DEPRECATED
DEPRECATED = \
@ -2452,27 +2471,29 @@ lapacke_zlagsy.o \
lapacke_zlagsy_work.o
endif
all: ../../$(LAPACKELIB)
.PHONY: all
all: $(LAPACKELIB)
.PHONY: ../../$(LAPACKELIB)
../../$(LAPACKELIB): $(OBJ_A) $(OBJ_B) $(DEPRECATED) $(EXTENDED) $(MATGEN)
$(ARCH) $(ARCHFLAGS) $@ $(OBJ_A)
$(ARCH) $(ARCHFLAGS) $@ $(OBJ_B)
$(LAPACKELIB): $(OBJ) $(OBJ_S) $(OBJ_C) $(OBJ_D) $(OBJ_Z) $(DEPRECATED) $(EXTENDED) $(MATGEN)
$(AR) $(ARFLAGS) $@ $(OBJ)
$(AR) $(ARFLAGS) $@ $(OBJ_S)
$(AR) $(ARFLAGS) $@ $(OBJ_C)
$(AR) $(ARFLAGS) $@ $(OBJ_D)
$(AR) $(ARFLAGS) $@ $(OBJ_Z)
ifdef BUILD_DEPRECATED
$(ARCH) $(ARCHFLAGS) $@ $(DEPRECATED)
$(AR) $(ARFLAGS) $@ $(DEPRECATED)
endif
ifdef (USEXBLAS)
$(ARCH) $(ARCHFLAGS) $@ $(EXTENDED)
$(AR) $(ARFLAGS) $@ $(EXTENDED)
endif
ifdef LAPACKE_WITH_TMG
$(ARCH) $(ARCHFLAGS) $@ $(MATGEN)
$(AR) $(ARFLAGS) $@ $(MATGEN)
endif
$(RANLIB) $@
clean: cleanobj
.PHONY: clean cleanobj cleanlib
clean: cleanobj cleanlib
cleanobj:
rm -f *.o
.c.o:
$(CC) $(CFLAGS) -I../include -c -o $@ $<
cleanlib:
rm -f $(LAPACKELIB)

View File

@ -124,7 +124,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv,
float* rwork = NULL;
lapack_complex_float* cwork = NULL;
lapack_int i;
lapack_int nu, nv;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_cgejsv", -1 );
return -1;
@ -132,8 +131,6 @@ lapack_int LAPACKE_cgejsv( int matrix_layout, char joba, char jobu, char jobv,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
nu = LAPACKE_lsame( jobu, 'n' ) ? 1 : m;
nv = LAPACKE_lsame( jobv, 'n' ) ? 1 : n;
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -10;
}

View File

@ -75,7 +75,7 @@ lapack_int LAPACKE_cgelsd( int matrix_layout, lapack_int m, lapack_int n,
if( info != 0 ) {
goto exit_level_0;
}
liwork = (lapack_int)iwork_query;
liwork = iwork_query;
lrwork = (lapack_int)rwork_query;
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */

View File

@ -0,0 +1,106 @@
/*****************************************************************************
Copyright (c) 2014, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgesvdq
* Author: Intel Corporation
* Generated November 2018
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cgesvdq( int matrix_layout, char joba, char jobp,
char jobr, char jobu, char jobv,
lapack_int m, lapack_int n, lapack_complex_float* a,
lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu,
lapack_complex_float* v, lapack_int ldv, lapack_int* numrank)
{
lapack_int info = 0;
lapack_int liwork = -1;
lapack_int* iwork = NULL;
lapack_int iwork_query;
lapack_int lcwork = -1;
lapack_complex_float* cwork = NULL;
lapack_complex_float cwork_query;
lapack_int lrwork = -1;
double* rwork = NULL;
double rwork_query;
lapack_int i;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_cgesvdq", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -6;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv,
m, n, a, lda, s, u, ldu, v, ldv, numrank,
&iwork_query, liwork, &cwork_query, lcwork,
&rwork_query, lrwork );
if( info != 0 ) {
goto exit_level_0;
}
liwork = iwork_query;
lcwork = LAPACK_C2INT(cwork_query);
lrwork = (lapack_int)rwork_query;
/* Allocate memory for work arrays */
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );
if( iwork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
cwork = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lcwork );
if( cwork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
rwork = (double*)LAPACKE_malloc( sizeof(double) * lrwork );
if( rwork == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_cgesvdq_work( matrix_layout, joba, jobp, jobr, jobu, jobv,
m, n, a, lda, s, u, ldu, v, ldv, numrank,
iwork, liwork, cwork, lcwork, rwork, lrwork );
/* Release memory and exit */
LAPACKE_free( iwork );
LAPACKE_free( cwork );
LAPACKE_free( rwork );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cgesvdq", info );
}
return info;
}

View File

@ -0,0 +1,149 @@
/*****************************************************************************
Copyright (c) 2014, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgesvdq
* Author: Intel Corporation
* Generated November 2015
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cgesvdq_work( int matrix_layout, char joba, char jobp,
char jobr, char jobu, char jobv,
lapack_int m, lapack_int n, lapack_complex_float* a,
lapack_int lda, float* s, lapack_complex_float* u, lapack_int ldu,
lapack_complex_float* v, lapack_int ldv, lapack_int* numrank,
lapack_int* iwork, lapack_int liwork,
lapack_complex_float* cwork, lapack_int lcwork,
float* rwork, lapack_int lrwork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda, s, u, &ldu, v, &ldv,
numrank, iwork, &liwork, cwork, &lcwork, rwork, &lrwork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int nrows_u = ( LAPACKE_lsame( jobu, 'a' ) ||
LAPACKE_lsame( jobu, 's' ) ) ? m : 1;
lapack_int ncols_u = LAPACKE_lsame( jobu, 'a' ) ? m :
(LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
lapack_int nrows_v = LAPACKE_lsame( jobv, 'a' ) ? n :
( LAPACKE_lsame( jobv, 's' ) ? MIN(m,n) : 1);
lapack_int lda_t = MAX(1,m);
lapack_int ldu_t = MAX(1,nrows_u);
lapack_int ldv_t = MAX(1,nrows_v);
lapack_complex_float* a_t = NULL;
lapack_complex_float* u_t = NULL;
lapack_complex_float* v_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -9;
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
return info;
}
if( ldu < ncols_u ) {
info = -12;
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
return info;
}
if( ldv < n ) {
info = -14;
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lcwork == -1 ) {
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t,
s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork,
cwork, &lcwork, rwork, &lrwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_float*)LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
u_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldu_t * MAX(1,ncols_u) );
if( u_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
}
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) {
v_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldv_t * MAX(1,n) );
if( v_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_2;
}
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_cgesvdq( &joba, &jobp, &jobr, &jobu, &jobv, &m, &n, a, &lda_t,
s, u, &ldu_t, v, &ldv_t, numrank, iwork, &liwork,
cwork, &lcwork, rwork, &lrwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_u, ncols_u, u_t, ldu_t,
u, ldu );
}
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) {
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nrows_v, n, v_t, ldv_t, v,
ldv );
}
/* Release memory and exit */
if( LAPACKE_lsame( jobv, 'a' ) || LAPACKE_lsame( jobv, 's' ) ) {
LAPACKE_free( v_t );
}
exit_level_2:
if( LAPACKE_lsame( jobu, 'a' ) || LAPACKE_lsame( jobu, 's' ) ) {
LAPACKE_free( u_t );
}
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_cgesvdq_work", info );
}
return info;
}

View File

@ -91,7 +91,7 @@ lapack_int LAPACKE_cggesx( int matrix_layout, char jobvsl, char jobvsr,
if( info != 0 ) {
goto exit_level_2;
}
liwork = (lapack_int)iwork_query;
liwork = iwork_query;
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );

View File

@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd( int matrix_layout, char jobz, char uplo, lapack_int n
if( info != 0 ) {
goto exit_level_0;
}
liwork = (lapack_int)iwork_query;
liwork = iwork_query;
lrwork = (lapack_int)rwork_query;
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */

View File

@ -67,7 +67,7 @@ lapack_int LAPACKE_chbevd_2stage( int matrix_layout, char jobz, char uplo, lapac
if( info != 0 ) {
goto exit_level_0;
}
liwork = (lapack_int)iwork_query;
liwork = iwork_query;
lrwork = (lapack_int)rwork_query;
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */

View File

@ -71,7 +71,7 @@ lapack_int LAPACKE_chbgvd( int matrix_layout, char jobz, char uplo, lapack_int n
if( info != 0 ) {
goto exit_level_0;
}
liwork = (lapack_int)iwork_query;
liwork = iwork_query;
lrwork = (lapack_int)rwork_query;
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */

View File

@ -70,7 +70,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo,
goto exit_level_0;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t );
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_cheev( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
&info );
@ -78,7 +78,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( a_t );
exit_level_0:

View File

@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -5;
}
}
@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd( int matrix_layout, char jobz, char uplo, lapack_int n
if( info != 0 ) {
goto exit_level_0;
}
liwork = (lapack_int)iwork_query;
liwork = iwork_query;
lrwork = (lapack_int)rwork_query;
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */

View File

@ -53,7 +53,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -5;
}
}
@ -65,7 +65,7 @@ lapack_int LAPACKE_cheevd_2stage( int matrix_layout, char jobz, char uplo, lapac
if( info != 0 ) {
goto exit_level_0;
}
liwork = (lapack_int)iwork_query;
liwork = iwork_query;
lrwork = (lapack_int)rwork_query;
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */

View File

@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo,
goto exit_level_0;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t );
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_cheevd_2stage( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
&lrwork, iwork, &liwork, &info );
@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( a_t );
exit_level_0:

View File

@ -71,7 +71,7 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo,
goto exit_level_0;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, n, n, a, lda, a_t, lda_t );
LAPACKE_che_trans( matrix_layout, uplo, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_cheevd( &jobz, &uplo, &n, a_t, &lda_t, w, work, &lwork, rwork,
&lrwork, iwork, &liwork, &info );
@ -79,7 +79,8 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( a_t );
exit_level_0:

Some files were not shown because too many files have changed in this diff Show More