Merge pull request #3216 from xianyi/develop
Update from develop for 0.3.15 release
This commit is contained in:
commit
65502c6af6
11
.travis.yml
11
.travis.yml
|
@ -224,7 +224,16 @@ matrix:
|
|||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
- brew update
|
||||
- brew install gcc@10
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode12
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
- brew update
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
|
|
|
@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
|
|||
project(OpenBLAS C ASM)
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 14)
|
||||
set(OpenBLAS_PATCH_VERSION 14.dev)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
|
|
|
@ -1,4 +1,54 @@
|
|||
OpenBLAS ChangeLog
|
||||
====================================================================
|
||||
Version 0.3.15
|
||||
2-May-2021
|
||||
|
||||
common:
|
||||
- imported improvements and bugfixes from Reference-LAPACK 3.9.1
|
||||
- imported LAPACKE interface fixes from Reference-LAPACK PRs 534 + 537
|
||||
- fixed a problem in the cpu detection of 0.3.14 that prevented cross-compilation
|
||||
- fixed a sequence problem in the generation of softlinks to the library in GMAKE
|
||||
|
||||
RISC V:
|
||||
- fixed compilation on RISCV (missing entry in getarch)
|
||||
- fixed a potential division by zero in CROTG and ZROTG
|
||||
|
||||
POWER:
|
||||
- fixed LAPACK testsuite failures seen with the NVIDIA HPC compiler
|
||||
- improved CGEMM, DGEMM and ZGEMM performance on POWER10
|
||||
- added an optimized ZGEMV kernel for POWER10
|
||||
- fixed a potential division by zero in CROTG and ZROTG
|
||||
|
||||
x86_64:
|
||||
- added support for Intel Control-flow Enforcement Technology (CET)
|
||||
- reverted the DOMATCOPY_RT code to the generic C version
|
||||
- fixed a bug in the AVX512 SGEMM kernel introduced in 0.3.14
|
||||
- fixed misapplication of -msse flag to non-SSE cpus in DYNAMIC_ARCH
|
||||
- added support for compilation of the benchmarks on older OSX versions
|
||||
- fix propagation of the NO_AVX512 option in CMAKE builds
|
||||
- fix compilation of the AVX512 SGEMM kernel with clang-cl on Windows
|
||||
- fixed compilation of the CTESTs with INTERFACE64=1 (random faults on OSX)
|
||||
- corrected the Haswell DROT kernel to require AVX2/FMA3 rather than AVX512
|
||||
|
||||
ARM:
|
||||
- fixed a potential division by zero in CROTG and ZROTG
|
||||
- fixed a potential overflow in IMATCOPY/ZIMATCOPY and the CTESTs
|
||||
|
||||
ARM64:
|
||||
- fixed spurious reads outside the array in the SGEMM tcopy macro
|
||||
- fixed a potential division by zero in CROTG and ZROTG
|
||||
- fixed a segmentation fault in DYNAMIC_ARCH builds (reappeared in 0.3.14)
|
||||
|
||||
MIPS
|
||||
- fixed a potential division by zero in CROTG and ZROTG
|
||||
- fixed a potential overflow in IMATCOPY/ZIMATCOPY and the CTESTs
|
||||
|
||||
MIPS64:
|
||||
- fixed a potential division by zero in CROTG and ZROTG
|
||||
|
||||
SPARC:
|
||||
- fixed a potential division by zero in CROTG and ZROTG
|
||||
|
||||
====================================================================
|
||||
Version 0.3.14
|
||||
17-Mar-2021
|
||||
|
|
2
Makefile
2
Makefile
|
@ -167,7 +167,6 @@ ifeq ($(NO_SHARED), 1)
|
|||
$(error OpenBLAS: neither static nor shared are enabled.)
|
||||
endif
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
|
@ -196,6 +195,7 @@ endif
|
|||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.14
|
||||
VERSION = 0.3.14.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
|
17
Makefile.x86
17
Makefile.x86
|
@ -1,10 +1,21 @@
|
|||
# COMPILER_PREFIX = mingw32-
|
||||
|
||||
ifdef HAVE_SSE
|
||||
CCOMMON_OPT += -msse
|
||||
FCOMMON_OPT += -msse
|
||||
ifndef DYNAMIC_ARCH
|
||||
ADD_CPUFLAGS = 1
|
||||
else
|
||||
ifdef TARGET_CORE
|
||||
ADD_CPUFLAGS = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ADD_CPUFLAGS
|
||||
ifdef HAVE_SSE
|
||||
CCOMMON_OPT += -msse
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -msse
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x86
|
||||
|
|
|
@ -8,6 +8,16 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifndef DYNAMIC_ARCH
|
||||
ADD_CPUFLAGS = 1
|
||||
else
|
||||
ifdef TARGET_CORE
|
||||
ADD_CPUFLAGS = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ADD_CPUFLAGS
|
||||
ifdef HAVE_SSE3
|
||||
CCOMMON_OPT += -msse3
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
|
@ -44,7 +54,6 @@ endif
|
|||
endif
|
||||
|
||||
ifeq ($(CORE), SKYLAKEX)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
|
@ -62,10 +71,8 @@ endif
|
|||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), COOPERLAKE)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX512
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# cooperlake support was added in 10.1
|
||||
|
@ -88,7 +95,6 @@ endif
|
|||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef HAVE_AVX2
|
||||
ifndef NO_AVX2
|
||||
|
@ -120,6 +126,7 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
|
|
|
@ -4,6 +4,14 @@ trigger:
|
|||
branches:
|
||||
include:
|
||||
- develop
|
||||
resources:
|
||||
containers:
|
||||
- container: oneapi-hpckit
|
||||
image: intel/oneapi-hpckit:latest
|
||||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||
- container: oneapi-basekit
|
||||
image: intel/oneapi-basekit:latest
|
||||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||
|
||||
jobs:
|
||||
# manylinux1 is useful to test because the
|
||||
|
@ -68,4 +76,64 @@ jobs:
|
|||
dir
|
||||
openblas_utest.exe
|
||||
|
||||
- job: OSX_OpenMP
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-10 FC=gfortran-10
|
||||
|
||||
- job: OSX_GCC_Nothreads
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make USE_THREADS=0 CC=gcc-10 FC=gfortran-10
|
||||
|
||||
- job: OSX_OpenMP_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
make TARGET=CORE2 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang FC=gfortran-10
|
||||
|
||||
- job: OSX_Ifort_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-10.15'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
sudo mkdir -p /opt/intel
|
||||
sudo chown $USER /opt/intel
|
||||
displayName: prepare for cache restore
|
||||
- task: Cache@2
|
||||
inputs:
|
||||
path: /opt/intel/oneapi
|
||||
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
|
||||
cacheHitVar: CACHE_RESTORED
|
||||
- script: |
|
||||
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
|
||||
hdiutil attach webimage.dmg
|
||||
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
|
||||
installer_exit_code=$?
|
||||
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
|
||||
exit $installer_exit_code
|
||||
displayName: install
|
||||
condition: ne(variables.CACHE_RESTORED, 'true')
|
||||
- script: |
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
make CC=/usr/local/opt/llvm/bin/clang FC=ifort
|
||||
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
#include <time.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ set(SLASRC
|
|||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
|
||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
|
||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
|
||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
|
||||
slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
|
||||
slarrv.f slartv.f
|
||||
slarz.f slarzb.f slarzt.f slasy2.f
|
||||
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
|
||||
|
@ -112,14 +112,14 @@ set(SLASRC
|
|||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
|
||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f
|
||||
sgelqt.f sgelqt3.f sgemlqt.f
|
||||
sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
|
||||
sgetsls.f sgetsqrhrt.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
|
||||
sgelq.f slaswlq.f slamswlq.f sgemlq.f
|
||||
stplqt.f stplqt2.f stpmlqt.f
|
||||
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
|
||||
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
|
||||
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
|
||||
sgesvdq.f slaorhr_col_getrfnp.f
|
||||
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
|
||||
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
|
||||
|
||||
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
|
||||
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
|
||||
|
@ -171,7 +171,7 @@ set(CLASRC
|
|||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
|
||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
|
||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
|
||||
clarf.f clarfb.f clarfg.f clarfgp.f clarft.f
|
||||
clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
|
||||
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
|
||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
|
||||
clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
|
||||
|
@ -209,14 +209,14 @@ set(CLASRC
|
|||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
|
||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f
|
||||
cgelqt.f cgelqt3.f cgemlqt.f
|
||||
cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
|
||||
cgetsls.f cgetsqrhrt.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
|
||||
cgelq.f claswlq.f clamswlq.f cgemlq.f
|
||||
ctplqt.f ctplqt2.f ctpmlqt.f
|
||||
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
|
||||
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
|
||||
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
|
||||
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
|
||||
cungtsqr.f cunhr_col.f )
|
||||
cungtsqr.f cungtsqr_row.f cunhr_col.f )
|
||||
|
||||
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
|
||||
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
|
||||
|
@ -253,7 +253,7 @@ set(DLASRC
|
|||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
|
||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
|
||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
|
||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
|
||||
dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
|
||||
dlargv.f dlarrv.f dlartv.f
|
||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f
|
||||
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
|
||||
|
@ -300,14 +300,14 @@ set(DLASRC
|
|||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
|
||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f
|
||||
dgelqt.f dgelqt3.f dgemlqt.f
|
||||
dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
|
||||
dgetsls.f dgetsqrhrt.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
|
||||
dgelq.f dlaswlq.f dlamswlq.f dgemlq.f
|
||||
dtplqt.f dtplqt2.f dtpmlqt.f
|
||||
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
|
||||
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
|
||||
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
|
||||
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
|
||||
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f )
|
||||
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
|
||||
|
||||
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
|
||||
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
|
||||
|
@ -360,7 +360,7 @@ set(ZLASRC
|
|||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
|
||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
|
||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
|
||||
zlarcm.f zlarf.f zlarfb.f
|
||||
zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f
|
||||
zlarfg.f zlarfgp.f zlarft.f
|
||||
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
|
||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
|
||||
|
@ -402,13 +402,13 @@ set(ZLASRC
|
|||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f
|
||||
ztplqt.f ztplqt2.f ztpmlqt.f
|
||||
zgelqt.f zgelqt3.f zgemlqt.f
|
||||
zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
|
||||
zgetsls.f zgetsqrhrt.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
|
||||
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
|
||||
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
|
||||
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
|
||||
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
|
||||
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
|
||||
zungtsqr.f zunhr_col.f)
|
||||
zungtsqr.f zungtsqr_row.f zunhr_col.f)
|
||||
|
||||
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
|
||||
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
|
||||
|
|
|
@ -114,6 +114,8 @@ set(CSRC
|
|||
lapacke_cgetrs_work.c
|
||||
lapacke_cgetsls.c
|
||||
lapacke_cgetsls_work.c
|
||||
lapacke_cgetsqrhrt.c
|
||||
lapacke_cgetsqrhrt_work.c
|
||||
lapacke_cggbak.c
|
||||
lapacke_cggbak_work.c
|
||||
lapacke_cggbal.c
|
||||
|
@ -590,6 +592,8 @@ set(CSRC
|
|||
lapacke_cungrq_work.c
|
||||
lapacke_cungtr.c
|
||||
lapacke_cungtr_work.c
|
||||
lapacke_cungtsqr_row.c
|
||||
lapacke_cungtsqr_row_work.c
|
||||
lapacke_cunmbr.c
|
||||
lapacke_cunmbr_work.c
|
||||
lapacke_cunmhr.c
|
||||
|
@ -735,6 +739,8 @@ set(DSRC
|
|||
lapacke_dgetrs_work.c
|
||||
lapacke_dgetsls.c
|
||||
lapacke_dgetsls_work.c
|
||||
lapacke_dgetsqrhrt.c
|
||||
lapacke_dgetsqrhrt_work.c
|
||||
lapacke_dggbak.c
|
||||
lapacke_dggbak_work.c
|
||||
lapacke_dggbal.c
|
||||
|
@ -862,6 +868,8 @@ set(DSRC
|
|||
lapacke_dorgrq_work.c
|
||||
lapacke_dorgtr.c
|
||||
lapacke_dorgtr_work.c
|
||||
lapacke_dorgtsqr_row.c
|
||||
lapacke_dorgtsqr_row_work.c
|
||||
lapacke_dormbr.c
|
||||
lapacke_dormbr_work.c
|
||||
lapacke_dormhr.c
|
||||
|
@ -1309,6 +1317,8 @@ set(SSRC
|
|||
lapacke_sgetrs_work.c
|
||||
lapacke_sgetsls.c
|
||||
lapacke_sgetsls_work.c
|
||||
lapacke_sgetsqrhrt.c
|
||||
lapacke_sgetsqrhrt_work.c
|
||||
lapacke_sggbak.c
|
||||
lapacke_sggbak_work.c
|
||||
lapacke_sggbal.c
|
||||
|
@ -1435,6 +1445,8 @@ set(SSRC
|
|||
lapacke_sorgrq_work.c
|
||||
lapacke_sorgtr.c
|
||||
lapacke_sorgtr_work.c
|
||||
lapacke_sorgtsqr_row.c
|
||||
lapacke_sorgtsqr_row_work.c
|
||||
lapacke_sormbr.c
|
||||
lapacke_sormbr_work.c
|
||||
lapacke_sormhr.c
|
||||
|
@ -1877,6 +1889,8 @@ set(ZSRC
|
|||
lapacke_zgetrs_work.c
|
||||
lapacke_zgetsls.c
|
||||
lapacke_zgetsls_work.c
|
||||
lapacke_zgetsqrhrt.c
|
||||
lapacke_zgetsqrhrt_work.c
|
||||
lapacke_zggbak.c
|
||||
lapacke_zggbak_work.c
|
||||
lapacke_zggbal.c
|
||||
|
@ -2351,6 +2365,8 @@ set(ZSRC
|
|||
lapacke_zungrq_work.c
|
||||
lapacke_zungtr.c
|
||||
lapacke_zungtr_work.c
|
||||
lapacke_zungtsqr_row.c
|
||||
lapacke_zungtsqr_row_work.c
|
||||
lapacke_zunmbr.c
|
||||
lapacke_zunmbr_work.c
|
||||
lapacke_zunmhr.c
|
||||
|
|
|
@ -299,6 +299,10 @@ if (NO_AVX2)
|
|||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
|
||||
endif ()
|
||||
|
||||
if (NO_AVX512)
|
||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
|
||||
endif ()
|
||||
|
||||
if (USE_THREAD)
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
# NO_AFFINITY = 1
|
||||
|
|
9
common.h
9
common.h
|
@ -416,6 +416,15 @@ please https://github.com/xianyi/OpenBLAS/issues/246
|
|||
#include "common_alpha.h"
|
||||
#endif
|
||||
|
||||
#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && defined(__CET__) && defined(__has_include)
|
||||
#if __has_include(<cet.h>)
|
||||
#include <cet.h>
|
||||
#endif
|
||||
#endif
|
||||
#ifndef _CET_ENDBR
|
||||
#define _CET_ENDBR
|
||||
#endif
|
||||
|
||||
#ifdef ARCH_X86
|
||||
#include "common_x86.h"
|
||||
#endif
|
||||
|
|
|
@ -340,7 +340,8 @@ REALNAME:
|
|||
.align 16; \
|
||||
.globl REALNAME ;\
|
||||
.type REALNAME, @function; \
|
||||
REALNAME:
|
||||
REALNAME: \
|
||||
_CET_ENDBR
|
||||
|
||||
#ifdef PROFILE
|
||||
#define PROFCODE call mcount
|
||||
|
|
|
@ -451,7 +451,8 @@ REALNAME:
|
|||
.align 512; \
|
||||
.globl REALNAME ;\
|
||||
.type REALNAME, @function; \
|
||||
REALNAME:
|
||||
REALNAME: \
|
||||
_CET_ENDBR
|
||||
|
||||
#ifdef PROFILE
|
||||
#define PROFCODE call *mcount@GOTPCREL(%rip)
|
||||
|
|
|
@ -20,7 +20,7 @@ void F77_cgemv(int *order, char *transp, int *m, int *n,
|
|||
get_transpose_type(transp, &trans);
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = (CBLAS_TEST_COMPLEX *)malloc( (*m)*LDA*sizeof( CBLAS_TEST_COMPLEX) );
|
||||
A = (CBLAS_TEST_COMPLEX *)malloc( (*m)*(size_t)LDA*sizeof( CBLAS_TEST_COMPLEX) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -50,7 +50,7 @@ void F77_cgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
|||
get_transpose_type(transp, &trans);
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *ku+*kl+2;
|
||||
A=( CBLAS_TEST_COMPLEX* )malloc((*n+*kl)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
A=( CBLAS_TEST_COMPLEX* )malloc((*n+*kl)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*ku; i++ ){
|
||||
irow=*ku+*kl-i;
|
||||
jcol=(*ku)-i;
|
||||
|
@ -94,7 +94,7 @@ void F77_cgeru(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -122,7 +122,7 @@ void F77_cgerc(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -154,7 +154,7 @@ void F77_chemv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = (CBLAS_TEST_COMPLEX *)malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
A = (CBLAS_TEST_COMPLEX *)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -190,7 +190,7 @@ int i,irow,j,jcol,LDA;
|
|||
*incx, beta, y, *incy );
|
||||
else {
|
||||
LDA = *k+2;
|
||||
A =(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
A =(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -251,8 +251,8 @@ void F77_chpmv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
|||
beta, y, *incy);
|
||||
else {
|
||||
LDA = *n;
|
||||
A = (CBLAS_TEST_COMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ));
|
||||
AP = (CBLAS_TEST_COMPLEX* )malloc( (((LDA+1)*LDA)/2)*
|
||||
A = (CBLAS_TEST_COMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ));
|
||||
AP = (CBLAS_TEST_COMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof( CBLAS_TEST_COMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -311,7 +311,7 @@ void F77_ctbmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
x, *incx);
|
||||
else {
|
||||
LDA = *k+2;
|
||||
A=(CBLAS_TEST_COMPLEX *)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
A=(CBLAS_TEST_COMPLEX *)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -375,7 +375,7 @@ void F77_ctbsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
*incx);
|
||||
else {
|
||||
LDA = *k+2;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ));
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -436,8 +436,8 @@ void F77_ctpmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
cblas_ctpmv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
|
||||
else {
|
||||
LDA = *n;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
AP=(CBLAS_TEST_COMPLEX*)malloc((((LDA+1)*LDA)/2)*
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
AP=(CBLAS_TEST_COMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof(CBLAS_TEST_COMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -491,8 +491,8 @@ void F77_ctpsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
cblas_ctpsv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
|
||||
else {
|
||||
LDA = *n;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
AP=(CBLAS_TEST_COMPLEX*)malloc((((LDA+1)*LDA)/2)*
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
AP=(CBLAS_TEST_COMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof(CBLAS_TEST_COMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -544,7 +544,7 @@ void F77_ctrmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA=*n+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -573,7 +573,7 @@ void F77_ctrsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A =(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
A =(CBLAS_TEST_COMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -601,8 +601,8 @@ void F77_chpr(int *order, char *uplow, int *n, float *alpha,
|
|||
cblas_chpr(CblasRowMajor, UNDEFINED, *n, *alpha, x, *incx, ap );
|
||||
else {
|
||||
LDA = *n;
|
||||
A = (CBLAS_TEST_COMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
AP = ( CBLAS_TEST_COMPLEX* )malloc( (((LDA+1)*LDA)/2)*
|
||||
A = (CBLAS_TEST_COMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
AP = ( CBLAS_TEST_COMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof( CBLAS_TEST_COMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -678,8 +678,8 @@ void F77_chpr2(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
|||
*incy, ap );
|
||||
else {
|
||||
LDA = *n;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc( LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
AP=(CBLAS_TEST_COMPLEX*)malloc( (((LDA+1)*LDA)/2)*
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc( (size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
AP=(CBLAS_TEST_COMPLEX*)malloc( ((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof( CBLAS_TEST_COMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -750,7 +750,7 @@ void F77_cher(int *order, char *uplow, int *n, float *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*LDA*sizeof( CBLAS_TEST_COMPLEX ));
|
||||
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*(size_t)LDA*sizeof( CBLAS_TEST_COMPLEX ));
|
||||
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
|
@ -784,7 +784,7 @@ void F77_cher2(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A= ( CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
A= ( CBLAS_TEST_COMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ) );
|
||||
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
|
|
|
@ -19,7 +19,7 @@ void F77_dgemv(int *order, char *transp, int *m, int *n, double *alpha,
|
|||
get_transpose_type(transp, &trans);
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -43,7 +43,7 @@ void F77_dger(int *order, int *m, int *n, double *alpha, double *x, int *incx,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
|
||||
|
||||
for( i=0; i<*m; i++ ) {
|
||||
for( j=0; j<*n; j++ )
|
||||
|
@ -74,7 +74,7 @@ void F77_dtrmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -102,7 +102,7 @@ void F77_dtrsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -123,7 +123,7 @@ void F77_dsymv(int *order, char *uplow, int *n, double *alpha, double *a,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -146,7 +146,7 @@ void F77_dsyr(int *order, char *uplow, int *n, double *alpha, double *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -170,7 +170,7 @@ void F77_dsyr2(int *order, char *uplow, int *n, double *alpha, double *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -196,7 +196,7 @@ void F77_dgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *ku+*kl+2;
|
||||
A = ( double* )malloc( (*n+*kl)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n+*kl)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*ku; i++ ){
|
||||
irow=*ku+*kl-i;
|
||||
jcol=(*ku)-i;
|
||||
|
@ -236,7 +236,7 @@ void F77_dtbmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
A = ( double* )malloc( (*n+*k)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n+*k)*(size_t)LDA*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -282,7 +282,7 @@ void F77_dtbsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
A = ( double* )malloc( (*n+*k)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n+*k)*(size_t)LDA*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -325,7 +325,7 @@ void F77_dsbmv(int *order, char *uplow, int *n, int *k, double *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
A = ( double* )malloc( (*n+*k)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n+*k)*(size_t)LDA*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -369,8 +369,8 @@ void F77_dspmv(int *order, char *uplow, int *n, double *alpha, double *ap,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -411,8 +411,8 @@ void F77_dtpmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -451,8 +451,8 @@ void F77_dtpsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -488,8 +488,8 @@ void F77_dspr(int *order, char *uplow, int *n, double *alpha, double *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -540,8 +540,8 @@ void F77_dspr2(int *order, char *uplow, int *n, double *alpha, double *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
|
||||
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
|
|
@ -26,34 +26,34 @@ void F77_dgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A = (double *)malloc( (*m)*LDA*sizeof( double ) );
|
||||
A = (double *)malloc( (*m)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A = ( double* )malloc( LDA*(*k)*sizeof( double ) );
|
||||
A = ( double* )malloc( (size_t)LDA*(*k)*sizeof( double ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B = ( double* )malloc( (*k)*LDB*sizeof( double ) );
|
||||
B = ( double* )malloc( (*k)*(size_t)LDB*sizeof( double ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B = ( double* )malloc( LDB*(*n)*sizeof( double ) );
|
||||
B = ( double* )malloc( (size_t)LDB*(*n)*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
}
|
||||
LDC = *n+1;
|
||||
C = ( double* )malloc( (*m)*LDC*sizeof( double ) );
|
||||
C = ( double* )malloc( (*m)*(size_t)LDC*sizeof( double ) );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -89,25 +89,25 @@ void F77_dsymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B = ( double* )malloc( (*m)*LDB*sizeof( double ) );
|
||||
B = ( double* )malloc( (*m)*(size_t)LDB*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
LDC = *n+1;
|
||||
C = ( double* )malloc( (*m)*LDC*sizeof( double ) );
|
||||
C = ( double* )malloc( (*m)*(size_t)LDC*sizeof( double ) );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -143,20 +143,20 @@ void F77_dsyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*k)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*k)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDC = *n+1;
|
||||
C = ( double* )malloc( (*n)*LDC*sizeof( double ) );
|
||||
C = ( double* )malloc( (*n)*(size_t)LDC*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -191,8 +191,8 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
LDB = *k+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
B = ( double* )malloc( (*n)*LDB*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
B = ( double* )malloc( (*n)*(size_t)LDB*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
|
@ -202,8 +202,8 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
else {
|
||||
LDA = *n+1;
|
||||
LDB = *n+1;
|
||||
A = ( double* )malloc( LDA*(*k)*sizeof( double ) );
|
||||
B = ( double* )malloc( LDB*(*k)*sizeof( double ) );
|
||||
A = ( double* )malloc( (size_t)LDA*(*k)*sizeof( double ) );
|
||||
B = ( double* )malloc( (size_t)LDB*(*k)*sizeof( double ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
|
@ -211,7 +211,7 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C = ( double* )malloc( (*n)*LDC*sizeof( double ) );
|
||||
C = ( double* )malloc( (*n)*(size_t)LDC*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -249,20 +249,20 @@ void F77_dtrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B = ( double* )malloc( (*m)*LDB*sizeof( double ) );
|
||||
B = ( double* )malloc( (*m)*(size_t)LDB*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
|
@ -300,20 +300,20 @@ void F77_dtrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
|
||||
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B = ( double* )malloc( (*m)*LDB*sizeof( double ) );
|
||||
B = ( double* )malloc( (*m)*(size_t)LDB*sizeof( double ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
|
|
|
@ -19,7 +19,7 @@ void F77_sgemv(int *order, char *transp, int *m, int *n, float *alpha,
|
|||
get_transpose_type(transp, &trans);
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -43,7 +43,7 @@ void F77_sger(int *order, int *m, int *n, float *alpha, float *x, int *incx,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
|
||||
|
||||
for( i=0; i<*m; i++ ) {
|
||||
for( j=0; j<*n; j++ )
|
||||
|
@ -74,7 +74,7 @@ void F77_strmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -102,7 +102,7 @@ void F77_strsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -123,7 +123,7 @@ void F77_ssymv(int *order, char *uplow, int *n, float *alpha, float *a,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -146,7 +146,7 @@ void F77_ssyr(int *order, char *uplow, int *n, float *alpha, float *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -170,7 +170,7 @@ void F77_ssyr2(int *order, char *uplow, int *n, float *alpha, float *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[ LDA*i+j ]=a[ (*lda)*j+i ];
|
||||
|
@ -196,7 +196,7 @@ void F77_sgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *ku+*kl+2;
|
||||
A = ( float* )malloc( (*n+*kl)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n+*kl)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*ku; i++ ){
|
||||
irow=*ku+*kl-i;
|
||||
jcol=(*ku)-i;
|
||||
|
@ -236,7 +236,7 @@ void F77_stbmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
A = ( float* )malloc( (*n+*k)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n+*k)*(size_t)LDA*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -282,7 +282,7 @@ void F77_stbsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
A = ( float* )malloc( (*n+*k)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n+*k)*(size_t)LDA*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -325,7 +325,7 @@ void F77_ssbmv(int *order, char *uplow, int *n, int *k, float *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *k+1;
|
||||
A = ( float* )malloc( (*n+*k)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n+*k)*(size_t)LDA*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -369,8 +369,8 @@ void F77_sspmv(int *order, char *uplow, int *n, float *alpha, float *ap,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -410,8 +410,8 @@ void F77_stpmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -449,8 +449,8 @@ void F77_stpsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -485,8 +485,8 @@ void F77_sspr(int *order, char *uplow, int *n, float *alpha, float *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
@ -536,8 +536,8 @@ void F77_sspr2(int *order, char *uplow, int *n, float *alpha, float *x,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n;
|
||||
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
|
||||
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
for( i=0; i<j+1; i++, k++ )
|
||||
|
|
|
@ -23,34 +23,34 @@ void F77_sgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A = (float *)malloc( (*m)*LDA*sizeof( float ) );
|
||||
A = (float *)malloc( (*m)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A = ( float* )malloc( LDA*(*k)*sizeof( float ) );
|
||||
A = ( float* )malloc( (size_t)LDA*(*k)*sizeof( float ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B = ( float* )malloc( (*k)*LDB*sizeof( float ) );
|
||||
B = ( float* )malloc( (*k)*(size_t)LDB*sizeof( float ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B = ( float* )malloc( LDB*(*n)*sizeof( float ) );
|
||||
B = ( float* )malloc( (size_t)LDB*(*n)*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
}
|
||||
LDC = *n+1;
|
||||
C = ( float* )malloc( (*m)*LDC*sizeof( float ) );
|
||||
C = ( float* )malloc( (*m)*(size_t)LDC*sizeof( float ) );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -85,25 +85,25 @@ void F77_ssymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B = ( float* )malloc( (*m)*LDB*sizeof( float ) );
|
||||
B = ( float* )malloc( (*m)*(size_t)LDB*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
LDC = *n+1;
|
||||
C = ( float* )malloc( (*m)*LDC*sizeof( float ) );
|
||||
C = ( float* )malloc( (*m)*(size_t)LDC*sizeof( float ) );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -139,20 +139,20 @@ void F77_ssyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*k)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*k)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDC = *n+1;
|
||||
C = ( float* )malloc( (*n)*LDC*sizeof( float ) );
|
||||
C = ( float* )malloc( (*n)*(size_t)LDC*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -187,8 +187,8 @@ void F77_ssyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
LDB = *k+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
B = ( float* )malloc( (*n)*LDB*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
B = ( float* )malloc( (*n)*(size_t)LDB*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
|
@ -198,8 +198,8 @@ void F77_ssyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
else {
|
||||
LDA = *n+1;
|
||||
LDB = *n+1;
|
||||
A = ( float* )malloc( LDA*(*k)*sizeof( float ) );
|
||||
B = ( float* )malloc( LDB*(*k)*sizeof( float ) );
|
||||
A = ( float* )malloc( (size_t)LDA*(*k)*sizeof( float ) );
|
||||
B = ( float* )malloc( (size_t)LDB*(*k)*sizeof( float ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
|
@ -207,7 +207,7 @@ void F77_ssyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C = ( float* )malloc( (*n)*LDC*sizeof( float ) );
|
||||
C = ( float* )malloc( (*n)*(size_t)LDC*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -245,20 +245,20 @@ void F77_strmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B = ( float* )malloc( (*m)*LDB*sizeof( float ) );
|
||||
B = ( float* )malloc( (*m)*(size_t)LDB*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
|
@ -296,20 +296,20 @@ void F77_strsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
|
||||
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B = ( float* )malloc( (*m)*LDB*sizeof( float ) );
|
||||
B = ( float* )malloc( (*m)*(size_t)LDB*sizeof( float ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
|
|
|
@ -20,7 +20,7 @@ void F77_zgemv(int *order, char *transp, int *m, int *n,
|
|||
get_transpose_type(transp, &trans);
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = (CBLAS_TEST_ZOMPLEX *)malloc( (*m)*LDA*sizeof( CBLAS_TEST_ZOMPLEX) );
|
||||
A = (CBLAS_TEST_ZOMPLEX *)malloc( (*m)*(size_t)LDA*sizeof( CBLAS_TEST_ZOMPLEX) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -50,7 +50,7 @@ void F77_zgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
|
|||
get_transpose_type(transp, &trans);
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *ku+*kl+2;
|
||||
A=( CBLAS_TEST_ZOMPLEX* )malloc((*n+*kl)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=( CBLAS_TEST_ZOMPLEX* )malloc((*n+*kl)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*ku; i++ ){
|
||||
irow=*ku+*kl-i;
|
||||
jcol=(*ku)-i;
|
||||
|
@ -94,7 +94,7 @@ void F77_zgeru(int *order, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -122,7 +122,7 @@ void F77_zgerc(int *order, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -154,7 +154,7 @@ void F77_zhemv(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A = (CBLAS_TEST_ZOMPLEX *)malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A = (CBLAS_TEST_ZOMPLEX *)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -190,7 +190,7 @@ int i,irow,j,jcol,LDA;
|
|||
*incx, beta, y, *incy );
|
||||
else {
|
||||
LDA = *k+2;
|
||||
A =(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A =(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -251,8 +251,8 @@ void F77_zhpmv(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
|
|||
beta, y, *incy);
|
||||
else {
|
||||
LDA = *n;
|
||||
A = (CBLAS_TEST_ZOMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
AP = (CBLAS_TEST_ZOMPLEX* )malloc( (((LDA+1)*LDA)/2)*
|
||||
A = (CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
AP = (CBLAS_TEST_ZOMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof( CBLAS_TEST_ZOMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -311,7 +311,7 @@ void F77_ztbmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
x, *incx);
|
||||
else {
|
||||
LDA = *k+2;
|
||||
A=(CBLAS_TEST_ZOMPLEX *)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX *)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -375,7 +375,7 @@ void F77_ztbsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
*incx);
|
||||
else {
|
||||
LDA = *k+2;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( i=0; i<*k; i++ ){
|
||||
irow=*k-i;
|
||||
|
@ -436,8 +436,8 @@ void F77_ztpmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
cblas_ztpmv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
|
||||
else {
|
||||
LDA = *n;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
AP=(CBLAS_TEST_ZOMPLEX*)malloc((((LDA+1)*LDA)/2)*
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
AP=(CBLAS_TEST_ZOMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -491,8 +491,8 @@ void F77_ztpsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
cblas_ztpsv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
|
||||
else {
|
||||
LDA = *n;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
AP=(CBLAS_TEST_ZOMPLEX*)malloc((((LDA+1)*LDA)/2)*
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
AP=(CBLAS_TEST_ZOMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -544,7 +544,7 @@ void F77_ztrmv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA=*n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -573,7 +573,7 @@ void F77_ztrsv(int *order, char *uplow, char *transp, char *diagn,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A =(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A =(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
|
||||
|
@ -601,8 +601,8 @@ void F77_zhpr(int *order, char *uplow, int *n, double *alpha,
|
|||
cblas_zhpr(CblasRowMajor, UNDEFINED, *n, *alpha, x, *incx, ap );
|
||||
else {
|
||||
LDA = *n;
|
||||
A = (CBLAS_TEST_ZOMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
AP = ( CBLAS_TEST_ZOMPLEX* )malloc( (((LDA+1)*LDA)/2)*
|
||||
A = (CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
AP = ( CBLAS_TEST_ZOMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof( CBLAS_TEST_ZOMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -678,8 +678,8 @@ void F77_zhpr2(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
|
|||
*incy, ap );
|
||||
else {
|
||||
LDA = *n;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc( LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
AP=(CBLAS_TEST_ZOMPLEX*)malloc( (((LDA+1)*LDA)/2)*
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc( (size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
AP=(CBLAS_TEST_ZOMPLEX*)malloc( ((((size_t)LDA+1)*LDA)/2)*
|
||||
sizeof( CBLAS_TEST_ZOMPLEX ));
|
||||
if (uplo == CblasUpper) {
|
||||
for( j=0, k=0; j<*n; j++ )
|
||||
|
@ -750,7 +750,7 @@ void F77_zher(int *order, char *uplow, int *n, double *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*LDA*sizeof( CBLAS_TEST_ZOMPLEX ));
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*(size_t)LDA*sizeof( CBLAS_TEST_ZOMPLEX ));
|
||||
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
|
@ -784,7 +784,7 @@ void F77_zher2(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
|
|||
|
||||
if (*order == TEST_ROW_MJR) {
|
||||
LDA = *n+1;
|
||||
A= ( CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A= ( CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
|
|
|
@ -26,7 +26,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (transa == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -35,7 +35,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
|||
}
|
||||
else {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -45,7 +45,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
|||
|
||||
if (transb == CblasNoTrans) {
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_ZOMPLEX) );
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
|
@ -54,7 +54,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
|||
}
|
||||
else {
|
||||
LDB = *k+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDB*(*n)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
|
@ -63,7 +63,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
|
|||
}
|
||||
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
|
@ -103,7 +103,7 @@ void F77_zhemm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A= (CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A= (CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -112,7 +112,7 @@ void F77_zhemm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
|||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -120,14 +120,14 @@ void F77_zhemm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
|||
}
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
|
@ -167,25 +167,25 @@ void F77_zsymm(int *order, char *rtlf, char *uplow, int *m, int *n,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
A[i*LDA+j]=a[j*(*lda)+i];
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ )
|
||||
B[i*LDB+j]=b[j*(*ldb)+i];
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( j=0; j<*n; j++ )
|
||||
for( i=0; i<*m; i++ )
|
||||
C[i*LDC+j]=c[j*(*ldc)+i];
|
||||
|
@ -221,7 +221,7 @@ void F77_zherk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -230,7 +230,7 @@ void F77_zherk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -238,7 +238,7 @@ void F77_zherk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
|
@ -277,7 +277,7 @@ void F77_zsyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -286,7 +286,7 @@ void F77_zsyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -294,7 +294,7 @@ void F77_zsyrk(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
|
@ -333,8 +333,8 @@ void F77_zher2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
LDB = *k+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -346,8 +346,8 @@ void F77_zher2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
else {
|
||||
LDA = *n+1;
|
||||
LDB = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc( LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc( LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc( (size_t)LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc( (size_t)LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -357,7 +357,7 @@ void F77_zher2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
|
@ -397,8 +397,8 @@ void F77_zsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
if (trans == CblasNoTrans) {
|
||||
LDA = *k+1;
|
||||
LDB = *k+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*k; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -410,8 +410,8 @@ void F77_zsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
else {
|
||||
LDA = *n+1;
|
||||
LDB = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc(LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*k; i++ )
|
||||
for( j=0; j<*n; j++ ){
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -421,7 +421,7 @@ void F77_zsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
|
|||
}
|
||||
}
|
||||
LDC = *n+1;
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
C[i*LDC+j].real=c[j*(*ldc)+i].real;
|
||||
|
@ -463,7 +463,7 @@ void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -472,7 +472,7 @@ void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -480,7 +480,7 @@ void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
}
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
|
@ -522,7 +522,7 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
if (*order == TEST_ROW_MJR) {
|
||||
if (side == CblasLeft) {
|
||||
LDA = *m+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*m; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -531,7 +531,7 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
}
|
||||
else{
|
||||
LDA = *n+1;
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*n; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
A[i*LDA+j].real=a[j*(*lda)+i].real;
|
||||
|
@ -539,7 +539,7 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
|
|||
}
|
||||
}
|
||||
LDB = *n+1;
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX));
|
||||
for( i=0; i<*m; i++ )
|
||||
for( j=0; j<*n; j++ ) {
|
||||
B[i*LDB+j].real=b[j*(*ldb)+i].real;
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
#include "cblas_test.h"
|
||||
int CBLAS_CallFromC;
|
||||
int RowMajorStrg;
|
||||
|
||||
|
|
|
@ -126,7 +126,7 @@ extern void openblas_warning(int verbose, const char * msg);
|
|||
#endif
|
||||
|
||||
#define get_cpu_ftr(id, var) ({ \
|
||||
__asm__ ("mrs %0, "#id : "=r" (var)); \
|
||||
__asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
|
||||
})
|
||||
|
||||
static char *corename[] = {
|
||||
|
|
|
@ -139,9 +139,13 @@ endif
|
|||
ifneq (,$(filter 1 2,$(NOFORTRAN)))
|
||||
#only build without Fortran
|
||||
$(CC) $(CFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(INTERNALNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
else
|
||||
ifeq ($(F_COMPILER), INTEL)
|
||||
$(FC) $(FFLAGS) $(LDFLAGS) -all-load -headerpad_max_install_names -install_name "$(CURDIR)/../$(INTERNALNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def
|
||||
else
|
||||
$(FC) $(FFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(INTERNALNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
|
||||
endif
|
||||
endif
|
||||
|
||||
dllinit.$(SUFFIX) : dllinit.c
|
||||
$(CC) $(CFLAGS) -c -o $(@F) -s $<
|
||||
|
|
4
f_check
4
f_check
|
@ -391,10 +391,6 @@ if ($link ne "") {
|
|||
|
||||
}
|
||||
|
||||
if ($vendor eq "INTEL"){
|
||||
$linker_a .= "-lgfortran"
|
||||
}
|
||||
|
||||
if ($vendor eq "FLANG"){
|
||||
$linker_a .= "-lflang"
|
||||
}
|
||||
|
|
|
@ -4,7 +4,15 @@
|
|||
#else
|
||||
#include "config_kernel.h"
|
||||
#endif
|
||||
#include "common.h"
|
||||
#if (defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)) && defined(__64BIT__)
|
||||
typedef long long BLASLONG;
|
||||
typedef unsigned long long BLASULONG;
|
||||
#else
|
||||
typedef long BLASLONG;
|
||||
typedef unsigned long BLASULONG;
|
||||
#endif
|
||||
|
||||
#include "param.h"
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
|
|
|
@ -150,9 +150,9 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
#endif
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (*lda) * (*ldb) * sizeof(FLOAT);
|
||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT);
|
||||
else
|
||||
msize = (*ldb) * (*ldb) * sizeof(FLOAT);
|
||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT);
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
|
|
|
@ -172,9 +172,9 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
|
|||
#endif
|
||||
|
||||
if ( *lda > *ldb )
|
||||
msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
else
|
||||
msize = (*ldb) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2;
|
||||
|
||||
b = malloc(msize);
|
||||
if ( b == NULL )
|
||||
|
|
|
@ -79,8 +79,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
|||
aa_i = fabs(da_r);
|
||||
}
|
||||
|
||||
if (aa_r == ZERO) {
|
||||
ada = 0.;
|
||||
} else {
|
||||
scale = (aa_i / aa_r);
|
||||
ada = aa_r * sqrt(ONE + scale * scale);
|
||||
}
|
||||
|
||||
bb_r = fabs(db_r);
|
||||
bb_i = fabs(db_i);
|
||||
|
@ -90,9 +94,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
|
|||
bb_i = fabs(bb_r);
|
||||
}
|
||||
|
||||
if (bb_r == ZERO) {
|
||||
adb = 0.;
|
||||
} else {
|
||||
scale = (bb_i / bb_r);
|
||||
adb = bb_r * sqrt(ONE + scale * scale);
|
||||
|
||||
}
|
||||
scale = ada + adb;
|
||||
|
||||
aa_r = da_r / scale;
|
||||
|
|
|
@ -271,11 +271,6 @@ All rights reserved.
|
|||
ldr s2, [A03]
|
||||
ldr s3, [A04]
|
||||
|
||||
add A01, A01, #4
|
||||
add A02, A02, #4
|
||||
add A03, A03, #4
|
||||
add A04, A04, #4
|
||||
|
||||
stp s0, s1, [B04]
|
||||
add B04, B04, #8
|
||||
stp s2, s3, [B04]
|
||||
|
@ -286,11 +281,6 @@ All rights reserved.
|
|||
ldr s6, [A07]
|
||||
ldr s7, [A08]
|
||||
|
||||
ldr d4, [A05], #8
|
||||
ldr d5, [A06], #8
|
||||
ldr d6, [A07], #8
|
||||
ldr d7, [A08], #8
|
||||
|
||||
stp s4, s5, [B04]
|
||||
add B04, B04, #8
|
||||
stp s6, s7, [B04]
|
||||
|
|
|
@ -169,8 +169,13 @@ ZROTKERNEL = zrot.c
|
|||
#
|
||||
SSCALKERNEL = sscal.c
|
||||
DSCALKERNEL = dscal.c
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
CSCALKERNEL = ../arm/zscal.c
|
||||
ZSCALKERNEL = ../arm/zscal.c
|
||||
else
|
||||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
endif
|
||||
#
|
||||
SSWAPKERNEL = sswap.c
|
||||
DSWAPKERNEL = dswap.c
|
||||
|
@ -181,7 +186,7 @@ ZSWAPKERNEL = zswap.c
|
|||
SGEMVNKERNEL = sgemv_n.c
|
||||
DGEMVNKERNEL = dgemv_n_power10.c
|
||||
CGEMVNKERNEL = cgemv_n.c
|
||||
ZGEMVNKERNEL = zgemv_n_4.c
|
||||
ZGEMVNKERNEL = zgemv_n_power10.c
|
||||
#
|
||||
SGEMVTKERNEL = sgemv_t.c
|
||||
DGEMVTKERNEL = dgemv_t_power10.c
|
||||
|
|
|
@ -242,8 +242,13 @@ ZROTKERNEL = zrot.c
|
|||
#
|
||||
SSCALKERNEL = sscal.c
|
||||
DSCALKERNEL = dscal.c
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
CSCALKERNEL = ../arm/zscal.c
|
||||
ZSCALKERNEL = ../arm/zscal.c
|
||||
else
|
||||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
endif
|
||||
#
|
||||
SSWAPKERNEL = sswap.c
|
||||
DSWAPKERNEL = dswap.c
|
||||
|
|
|
@ -166,8 +166,13 @@ ZROTKERNEL = zrot.c
|
|||
#
|
||||
SSCALKERNEL = sscal.c
|
||||
DSCALKERNEL = dscal.c
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
CSCALKERNEL = ../arm/zscal.c
|
||||
ZSCALKERNEL = ../arm/zscal.c
|
||||
else
|
||||
CSCALKERNEL = zscal.c
|
||||
ZSCALKERNEL = zscal.c
|
||||
endif
|
||||
#
|
||||
SSWAPKERNEL = sswap.c
|
||||
DSWAPKERNEL = dswap.c
|
||||
|
|
|
@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#else
|
||||
|
||||
#include "common.h"
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "cdot_microk_power10.c"
|
||||
#else
|
||||
#ifndef HAVE_KERNEL_8
|
||||
|
@ -120,7 +120,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
|
|||
|
||||
if ((inc_x == 1) && (inc_y == 1)) {
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
BLASLONG n1 = n & -16;
|
||||
#else
|
||||
BLASLONG n1 = n & -8;
|
||||
|
|
|
@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "cswap_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "cswap_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "cswap_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "dasum_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "dasum_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "dasum_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
if ( inc_x == 1 )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 16 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
|
|
|
@ -190,10 +190,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
__vector_quad acc0, acc1, acc2, acc3, acc4,acc5,acc6,acc7;
|
||||
BLASLONG l = 0;
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__vector_pair rowB, rowB1;
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[0]));
|
||||
rowB1 = *((__vector_pair *)((void *)&BO[4]));
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
||||
|
@ -205,9 +204,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 3];
|
||||
rb = (vec_t *) & BO[l << 3];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[l << 3]));
|
||||
rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
||||
|
@ -247,9 +245,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
BLASLONG l = 0;
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB, rowB1;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[0]));
|
||||
rowB1 = *((__vector_pair *)((void *)&BO[4]));
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
|
||||
|
@ -257,9 +254,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 2];
|
||||
rb = (vec_t *) & BO[l << 3];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[l << 3]));
|
||||
rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
|
||||
|
@ -291,17 +287,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
BLASLONG l = 0;
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB, rowB1;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[0]));
|
||||
rowB1 = *((__vector_pair *)((void *)&BO[4]));
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 1];
|
||||
rb = (vec_t *) & BO[l << 3];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[l << 3]));
|
||||
rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
|
||||
}
|
||||
|
@ -403,8 +397,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
BLASLONG l = 0;
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[0]));
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
|
||||
|
@ -412,8 +405,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 3];
|
||||
rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[l << 2]));
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
|
||||
|
@ -445,15 +437,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
BLASLONG l = 0;
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[0]));
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 2];
|
||||
rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[l << 2]));
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
|
||||
}
|
||||
|
@ -481,14 +471,12 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
|
|||
BLASLONG l = 0;
|
||||
vec_t *rowA = (vec_t *) & AO[0];
|
||||
__vector_pair rowB;
|
||||
vec_t *rb = (vec_t *) & BO[0];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[0]));
|
||||
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
|
||||
for (l = 1; l < temp; l++)
|
||||
{
|
||||
rowA = (vec_t *) & AO[l << 1];
|
||||
rb = (vec_t *) & BO[l << 2];
|
||||
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
|
||||
rowB = *((__vector_pair *)((void *)&BO[l << 2]));
|
||||
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
|
||||
}
|
||||
SAVE_ACC (&acc0, 0);
|
||||
|
|
|
@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "drot_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "drot_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "drot_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
|
|||
if ( (inc_x == 1) && (inc_y == 1) )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 16 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
|
|
|
@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "dscal_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "dscal_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "dscal_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -102,7 +104,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
if ( da == 0.0 )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 16 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
|
@ -136,7 +138,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
else
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 16 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
|
|
|
@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "dswap_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "swap_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "dswap_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
|
|||
if ( (inc_x == 1) && (inc_y == 1 ))
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 32 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3;
|
||||
|
|
|
@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "sasum_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "sasum_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "sasum_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
|
|||
if ( inc_x == 1 )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 32 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
|
||||
|
|
|
@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "srot_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "srot_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "srot_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
|
|||
if ( (inc_x == 1) && (inc_y == 1) )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 16 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7;
|
||||
|
|
|
@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "sscal_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "sscal_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "sscal_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -104,7 +106,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
if ( da == 0.0 )
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 32 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
|
||||
|
@ -138,7 +140,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
|
|||
else
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 32 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
|
||||
|
|
|
@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "sswap_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "swap_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "sswap_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
|
|||
if ( (inc_x == 1) && (inc_y == 1 ))
|
||||
{
|
||||
|
||||
#if defined(POWER10)
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
if ( n >= 64 )
|
||||
{
|
||||
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7;
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -43,6 +43,134 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#elif HAVE_KERNEL_4x4_VEC
|
||||
|
||||
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
typedef __vector unsigned char vec_t;
|
||||
typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
|
||||
|
||||
|
||||
static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
|
||||
BLASLONG i;
|
||||
FLOAT *a0, *a1, *a2, *a3;
|
||||
a0 = ap;
|
||||
a1 = ap + lda;
|
||||
a2 = a1 + lda;
|
||||
a3 = a2 + lda;
|
||||
__vector_quad acc0, acc1, acc2, acc3;;
|
||||
__vector_quad acc4, acc5, acc6, acc7;
|
||||
v4sf_t result[4];
|
||||
__vector_pair *Va0, *Va1, *Va2, *Va3;
|
||||
i = 0;
|
||||
n = n << 1;
|
||||
__builtin_mma_xxsetaccz (&acc0);
|
||||
__builtin_mma_xxsetaccz (&acc1);
|
||||
__builtin_mma_xxsetaccz (&acc2);
|
||||
__builtin_mma_xxsetaccz (&acc3);
|
||||
__builtin_mma_xxsetaccz (&acc4);
|
||||
__builtin_mma_xxsetaccz (&acc5);
|
||||
__builtin_mma_xxsetaccz (&acc6);
|
||||
__builtin_mma_xxsetaccz (&acc7);
|
||||
while (i < n) {
|
||||
|
||||
vec_t *rx = (vec_t *) & x[i];
|
||||
Va0 = ((__vector_pair*)((void*)&a0[i]));
|
||||
Va1 = ((__vector_pair*)((void*)&a1[i]));
|
||||
Va2 = ((__vector_pair*)((void*)&a2[i]));
|
||||
Va3 = ((__vector_pair*)((void*)&a3[i]));
|
||||
|
||||
__builtin_mma_xvf64gerpp (&acc0, Va0[0], rx[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, Va1[0], rx[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, Va2[0], rx[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc3, Va3[0], rx[0]);
|
||||
__builtin_mma_xvf64gerpp (&acc4, Va0[0], rx[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc5, Va1[0], rx[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc6, Va2[0], rx[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc7, Va3[0], rx[1]);
|
||||
__builtin_mma_xvf64gerpp (&acc0, Va0[1], rx[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc1, Va1[1], rx[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc2, Va2[1], rx[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc3, Va3[1], rx[2]);
|
||||
__builtin_mma_xvf64gerpp (&acc4, Va0[1], rx[3]);
|
||||
__builtin_mma_xvf64gerpp (&acc5, Va1[1], rx[3]);
|
||||
__builtin_mma_xvf64gerpp (&acc6, Va2[1], rx[3]);
|
||||
__builtin_mma_xvf64gerpp (&acc7, Va3[1], rx[3]);
|
||||
i += 8;
|
||||
|
||||
}
|
||||
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc0);
|
||||
register FLOAT temp_r0 = result[0][0] - result[1][1];
|
||||
register FLOAT temp_i0 = result[0][1] + result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc4);
|
||||
temp_r0 += result[2][0] - result[3][1];
|
||||
temp_i0 += result[2][1] + result[3][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc1);
|
||||
register FLOAT temp_r1 = result[0][0] - result[1][1];
|
||||
register FLOAT temp_i1 = result[0][1] + result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc5);
|
||||
temp_r1 += result[2][0] - result[3][1];
|
||||
temp_i1 += result[2][1] + result[3][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc2);
|
||||
register FLOAT temp_r2 = result[0][0] - result[1][1];
|
||||
register FLOAT temp_i2 = result[0][1] + result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc6);
|
||||
temp_r2 += result[2][0] - result[3][1];
|
||||
temp_i2 += result[2][1] + result[3][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc3);
|
||||
register FLOAT temp_r3 = result[0][0] - result[1][1];
|
||||
register FLOAT temp_i3 = result[0][1] + result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc7);
|
||||
temp_r3 += result[2][0] - result[3][1];
|
||||
temp_i3 += result[2][1] + result[3][0];
|
||||
#else
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc0);
|
||||
register FLOAT temp_r0 = result[0][0] + result[1][1];
|
||||
register FLOAT temp_i0 = result[0][1] - result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc4);
|
||||
temp_r0 += result[2][0] + result[3][1];
|
||||
temp_i0 += result[2][1] - result[3][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc1);
|
||||
register FLOAT temp_r1 = result[0][0] + result[1][1];
|
||||
register FLOAT temp_i1 = result[0][1] - result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc5);
|
||||
temp_r1 += result[2][0] + result[3][1];
|
||||
temp_i1 += result[2][1] - result[3][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc2);
|
||||
register FLOAT temp_r2 = result[0][0] + result[1][1];
|
||||
register FLOAT temp_i2 = result[0][1] - result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc6);
|
||||
temp_r2 += result[2][0] + result[3][1];
|
||||
temp_i2 += result[2][1] - result[3][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc3);
|
||||
register FLOAT temp_r3 = result[0][0] + result[1][1];
|
||||
register FLOAT temp_i3 = result[0][1] - result[1][0];
|
||||
__builtin_mma_disassemble_acc ((void *)result, &acc7);
|
||||
temp_r3 += result[2][0] + result[3][1];
|
||||
temp_i3 += result[2][1] - result[3][0];
|
||||
#endif
|
||||
#if !defined(XCONJ)
|
||||
|
||||
y[0] += alpha_r * temp_r0 - alpha_i * temp_i0;
|
||||
y[1] += alpha_r * temp_i0 + alpha_i * temp_r0;
|
||||
y[2] += alpha_r * temp_r1 - alpha_i * temp_i1;
|
||||
y[3] += alpha_r * temp_i1 + alpha_i * temp_r1;
|
||||
y[4] += alpha_r * temp_r2 - alpha_i * temp_i2;
|
||||
y[5] += alpha_r * temp_i2 + alpha_i * temp_r2;
|
||||
y[6] += alpha_r * temp_r3 - alpha_i * temp_i3;
|
||||
y[7] += alpha_r * temp_i3 + alpha_i * temp_r3;
|
||||
|
||||
#else
|
||||
|
||||
y[0] += alpha_r * temp_r0 + alpha_i * temp_i0;
|
||||
y[1] -= alpha_r * temp_i0 - alpha_i * temp_r0;
|
||||
y[2] += alpha_r * temp_r1 + alpha_i * temp_i1;
|
||||
y[3] -= alpha_r * temp_i1 - alpha_i * temp_r1;
|
||||
y[4] += alpha_r * temp_r2 + alpha_i * temp_i2;
|
||||
y[5] -= alpha_r * temp_i2 - alpha_i * temp_r2;
|
||||
y[6] += alpha_r * temp_r3 + alpha_i * temp_i3;
|
||||
y[7] -= alpha_r * temp_i3 - alpha_i * temp_r3;
|
||||
#endif
|
||||
}
|
||||
#else
|
||||
static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
|
||||
BLASLONG i;
|
||||
FLOAT *a0, *a1, *a2, *a3;
|
||||
|
@ -198,6 +326,7 @@ static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
|
|||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
|
||||
|
|
|
@ -43,12 +43,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(DOUBLE)
|
||||
#include "zscal_microk_power8.c"
|
||||
#endif
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#if defined(DOUBLE)
|
||||
#include "zscal_microk_power10.c"
|
||||
#else
|
||||
#include "cscal_microk_power10.c"
|
||||
#endif
|
||||
#elif defined(POWER10)
|
||||
#if defined(DOUBLE)
|
||||
#include "zscal_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#if defined(__VEC__) || defined(__ALTIVEC__)
|
||||
#if defined(POWER8) || defined(POWER9)
|
||||
#include "zswap_microk_power8.c"
|
||||
#elif defined(POWER10)
|
||||
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
|
||||
#include "cswap_microk_power10.c"
|
||||
#elif defined(POWER10)
|
||||
#include "zswap_microk_power8.c"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -491,4 +491,3 @@ SSUMKERNEL = ../arm/sum.c
|
|||
DSUMKERNEL = ../arm/sum.c
|
||||
|
||||
SOMATCOPY_RT = omatcopy_rt.c
|
||||
DOMATCOPY_RT = omatcopy_rt.c
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
/* need a new enough GCC for avx512 support */
|
||||
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
|
||||
|
||||
#if defined(HAVE_FMA3) && defined(HAVE_AVX2)
|
||||
#define HAVE_DROT_KERNEL 1
|
||||
|
||||
#include <immintrin.h>
|
||||
|
|
|
@ -1,10 +1,10 @@
|
|||
/* the direct sgemm code written by Arjan van der Ven */
|
||||
|
||||
#include "common.h"
|
||||
|
||||
#if defined(SKYLAKEX) || defined (COOPERLAKE)
|
||||
|
||||
#include <immintrin.h>
|
||||
#include "common.h"
|
||||
|
||||
|
||||
/*
|
||||
* "Direct sgemm" code. This code operates directly on the inputs and outputs
|
||||
|
@ -472,7 +472,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG s
|
|||
}
|
||||
}
|
||||
#else
|
||||
#include "common.h"
|
||||
|
||||
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
|
||||
{}
|
||||
#endif
|
||||
|
|
|
@ -501,7 +501,11 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict__ A, f
|
|||
int32_t permil[16] = {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3};
|
||||
BLASLONG n_count = n;
|
||||
float *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B;
|
||||
#if defined(__clang__)
|
||||
for(;n_count>23;n_count-=24) COMPUTE(24)
|
||||
#else
|
||||
for(;n_count>23;n_count-=24) COMPUTE_n24
|
||||
#endif
|
||||
for(;n_count>19;n_count-=20) COMPUTE(20)
|
||||
for(;n_count>15;n_count-=16) COMPUTE(16)
|
||||
for(;n_count>11;n_count-=12) COMPUTE(12)
|
||||
|
|
|
@ -566,8 +566,8 @@ void LAPACK_cgbrfsx(
|
|||
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
|
||||
lapack_complex_float const* AB, lapack_int const* ldab,
|
||||
lapack_complex_float const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
|
||||
float* R,
|
||||
float* C,
|
||||
const float* R,
|
||||
const float* C,
|
||||
lapack_complex_float const* B, lapack_int const* ldb,
|
||||
lapack_complex_float* X, lapack_int const* ldx,
|
||||
float* rcond,
|
||||
|
@ -585,8 +585,8 @@ void LAPACK_dgbrfsx(
|
|||
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
|
||||
double const* AB, lapack_int const* ldab,
|
||||
double const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
|
||||
double* R,
|
||||
double* C,
|
||||
const double* R,
|
||||
const double* C,
|
||||
double const* B, lapack_int const* ldb,
|
||||
double* X, lapack_int const* ldx,
|
||||
double* rcond,
|
||||
|
@ -604,8 +604,8 @@ void LAPACK_sgbrfsx(
|
|||
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
|
||||
float const* AB, lapack_int const* ldab,
|
||||
float const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
|
||||
float* R,
|
||||
float* C,
|
||||
const float* R,
|
||||
const float* C,
|
||||
float const* B, lapack_int const* ldb,
|
||||
float* X, lapack_int const* ldx,
|
||||
float* rcond,
|
||||
|
@ -623,8 +623,8 @@ void LAPACK_zgbrfsx(
|
|||
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
|
||||
lapack_complex_double const* AB, lapack_int const* ldab,
|
||||
lapack_complex_double const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
|
||||
double* R,
|
||||
double* C,
|
||||
const double* R,
|
||||
const double* C,
|
||||
lapack_complex_double const* B, lapack_int const* ldb,
|
||||
lapack_complex_double* X, lapack_int const* ldx,
|
||||
double* rcond,
|
||||
|
@ -2941,6 +2941,42 @@ void LAPACK_zgetsls(
|
|||
lapack_complex_double* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_cgetsqrhrt LAPACK_GLOBAL(cgetsqrhrt,CGETSQRHRT)
|
||||
void LAPACK_cgetsqrhrt(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
|
||||
lapack_complex_float* A, lapack_int const* lda,
|
||||
lapack_complex_float* T, lapack_int const* ldt,
|
||||
lapack_complex_float* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_dgetsqrhrt LAPACK_GLOBAL(dgetsqrhrt,DGETSQRHRT)
|
||||
void LAPACK_dgetsqrhrt(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
|
||||
double* A, lapack_int const* lda,
|
||||
double* T, lapack_int const* ldt,
|
||||
double* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_sgetsqrhrt LAPACK_GLOBAL(sgetsqrhrt,SGETSQRHRT)
|
||||
void LAPACK_sgetsqrhrt(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
|
||||
float* A, lapack_int const* lda,
|
||||
float* T, lapack_int const* ldt,
|
||||
float* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_zgetsqrhrt LAPACK_GLOBAL(zgetsqrhrt,ZGETSQRHRT)
|
||||
void LAPACK_zgetsqrhrt(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
|
||||
lapack_complex_double* A, lapack_int const* lda,
|
||||
lapack_complex_double* T, lapack_int const* ldt,
|
||||
lapack_complex_double* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK)
|
||||
void LAPACK_cggbak(
|
||||
char const* job, char const* side,
|
||||
|
@ -4768,7 +4804,7 @@ void LAPACK_chegst(
|
|||
lapack_int const* itype, char const* uplo,
|
||||
lapack_int const* n,
|
||||
lapack_complex_float* A, lapack_int const* lda,
|
||||
lapack_complex_float* B, lapack_int const* ldb,
|
||||
const lapack_complex_float* B, lapack_int const* ldb,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_zhegst LAPACK_GLOBAL(zhegst,ZHEGST)
|
||||
|
@ -4776,7 +4812,7 @@ void LAPACK_zhegst(
|
|||
lapack_int const* itype, char const* uplo,
|
||||
lapack_int const* n,
|
||||
lapack_complex_double* A, lapack_int const* lda,
|
||||
lapack_complex_double* B, lapack_int const* ldb,
|
||||
const lapack_complex_double* B, lapack_int const* ldb,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_chegv LAPACK_GLOBAL(chegv,CHEGV)
|
||||
|
@ -4913,7 +4949,7 @@ void LAPACK_cherfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_float const* A, lapack_int const* lda,
|
||||
lapack_complex_float const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
|
||||
float* S,
|
||||
const float* S,
|
||||
lapack_complex_float const* B, lapack_int const* ldb,
|
||||
lapack_complex_float* X, lapack_int const* ldx,
|
||||
float* rcond,
|
||||
|
@ -4931,7 +4967,7 @@ void LAPACK_zherfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_double const* A, lapack_int const* lda,
|
||||
lapack_complex_double const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
|
||||
double* S,
|
||||
const double* S,
|
||||
lapack_complex_double const* B, lapack_int const* ldb,
|
||||
lapack_complex_double* X, lapack_int const* ldx,
|
||||
double* rcond,
|
||||
|
@ -7251,6 +7287,24 @@ void LAPACK_sorgtr(
|
|||
float* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_dorgtsqr_row LAPACK_GLOBAL(dorgtsqr_row,DORGTSQR_ROW)
|
||||
void LAPACK_dorgtsqr_row(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb, lapack_int const* nb,
|
||||
double* A, lapack_int const* lda,
|
||||
double const* T, lapack_int const* ldt,
|
||||
double* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_sorgtsqr_row LAPACK_GLOBAL(sorgtsqr_row,SORGTSQR_ROW)
|
||||
void LAPACK_sorgtsqr_row(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb, lapack_int const* nb,
|
||||
float* A, lapack_int const* lda,
|
||||
float const* T, lapack_int const* ldt,
|
||||
float* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR)
|
||||
void LAPACK_dormbr(
|
||||
char const* vect, char const* side, char const* trans,
|
||||
|
@ -8005,7 +8059,7 @@ void LAPACK_cporfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_float const* A, lapack_int const* lda,
|
||||
lapack_complex_float const* AF, lapack_int const* ldaf,
|
||||
float* S,
|
||||
const float* S,
|
||||
lapack_complex_float const* B, lapack_int const* ldb,
|
||||
lapack_complex_float* X, lapack_int const* ldx,
|
||||
float* rcond,
|
||||
|
@ -8023,7 +8077,7 @@ void LAPACK_dporfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
double const* A, lapack_int const* lda,
|
||||
double const* AF, lapack_int const* ldaf,
|
||||
double* S,
|
||||
const double* S,
|
||||
double const* B, lapack_int const* ldb,
|
||||
double* X, lapack_int const* ldx,
|
||||
double* rcond,
|
||||
|
@ -8041,7 +8095,7 @@ void LAPACK_sporfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
float const* A, lapack_int const* lda,
|
||||
float const* AF, lapack_int const* ldaf,
|
||||
float* S,
|
||||
const float* S,
|
||||
float const* B, lapack_int const* ldb,
|
||||
float* X, lapack_int const* ldx,
|
||||
float* rcond,
|
||||
|
@ -8059,7 +8113,7 @@ void LAPACK_zporfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_double const* A, lapack_int const* lda,
|
||||
lapack_complex_double const* AF, lapack_int const* ldaf,
|
||||
double* S,
|
||||
const double* S,
|
||||
lapack_complex_double const* B, lapack_int const* ldb,
|
||||
lapack_complex_double* X, lapack_int const* ldx,
|
||||
double* rcond,
|
||||
|
@ -10756,7 +10810,7 @@ void LAPACK_csyrfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_float const* A, lapack_int const* lda,
|
||||
lapack_complex_float const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
|
||||
float* S,
|
||||
const float* S,
|
||||
lapack_complex_float const* B, lapack_int const* ldb,
|
||||
lapack_complex_float* X, lapack_int const* ldx,
|
||||
float* rcond,
|
||||
|
@ -10774,7 +10828,7 @@ void LAPACK_dsyrfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
double const* A, lapack_int const* lda,
|
||||
double const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
|
||||
double* S,
|
||||
const double* S,
|
||||
double const* B, lapack_int const* ldb,
|
||||
double* X, lapack_int const* ldx,
|
||||
double* rcond,
|
||||
|
@ -10792,7 +10846,7 @@ void LAPACK_ssyrfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
float const* A, lapack_int const* lda,
|
||||
float const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
|
||||
float* S,
|
||||
const float* S,
|
||||
float const* B, lapack_int const* ldb,
|
||||
float* X, lapack_int const* ldx,
|
||||
float* rcond,
|
||||
|
@ -10810,7 +10864,7 @@ void LAPACK_zsyrfsx(
|
|||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_double const* A, lapack_int const* lda,
|
||||
lapack_complex_double const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
|
||||
double* S,
|
||||
const double* S,
|
||||
lapack_complex_double const* B, lapack_int const* ldb,
|
||||
lapack_complex_double* X, lapack_int const* ldx,
|
||||
double* rcond,
|
||||
|
@ -11556,7 +11610,7 @@ void LAPACK_zsytrs(
|
|||
void LAPACK_csytrs2(
|
||||
char const* uplo,
|
||||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_float* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
const lapack_complex_float* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
lapack_complex_float* B, lapack_int const* ldb,
|
||||
lapack_complex_float* work,
|
||||
lapack_int* info );
|
||||
|
@ -11565,7 +11619,7 @@ void LAPACK_csytrs2(
|
|||
void LAPACK_dsytrs2(
|
||||
char const* uplo,
|
||||
lapack_int const* n, lapack_int const* nrhs,
|
||||
double* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
const double* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
double* B, lapack_int const* ldb,
|
||||
double* work,
|
||||
lapack_int* info );
|
||||
|
@ -11574,7 +11628,7 @@ void LAPACK_dsytrs2(
|
|||
void LAPACK_ssytrs2(
|
||||
char const* uplo,
|
||||
lapack_int const* n, lapack_int const* nrhs,
|
||||
float* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
const float* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
float* B, lapack_int const* ldb,
|
||||
float* work,
|
||||
lapack_int* info );
|
||||
|
@ -11583,7 +11637,7 @@ void LAPACK_ssytrs2(
|
|||
void LAPACK_zsytrs2(
|
||||
char const* uplo,
|
||||
lapack_int const* n, lapack_int const* nrhs,
|
||||
lapack_complex_double* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
const lapack_complex_double* A, lapack_int const* lda, lapack_int const* ipiv,
|
||||
lapack_complex_double* B, lapack_int const* ldb,
|
||||
lapack_complex_double* work,
|
||||
lapack_int* info );
|
||||
|
@ -13540,6 +13594,24 @@ void LAPACK_zungtr(
|
|||
lapack_complex_double* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_cungtsqr_row LAPACK_GLOBAL(cungtsqr_row,CUNGTSQR_ROW)
|
||||
void LAPACK_cungtsqr_row(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb, lapack_int const* nb,
|
||||
lapack_complex_float* A, lapack_int const* lda,
|
||||
lapack_complex_float const* T, lapack_int const* ldt,
|
||||
lapack_complex_float* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_zungtsqr_row LAPACK_GLOBAL(zungtsqr_row,ZUNGTSQR_ROW)
|
||||
void LAPACK_zungtsqr_row(
|
||||
lapack_int const* m, lapack_int const* n,
|
||||
lapack_int const* mb, lapack_int const* nb,
|
||||
lapack_complex_double* A, lapack_int const* lda,
|
||||
lapack_complex_double const* T, lapack_int const* ldt,
|
||||
lapack_complex_double* work, lapack_int const* lwork,
|
||||
lapack_int* info );
|
||||
|
||||
#define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR)
|
||||
void LAPACK_cunmbr(
|
||||
char const* vect, char const* side, char const* trans,
|
||||
|
|
|
@ -1867,11 +1867,11 @@ lapack_int LAPACKE_zheevx( int matrix_layout, char jobz, char range, char uplo,
|
|||
|
||||
lapack_int LAPACKE_chegst( int matrix_layout, lapack_int itype, char uplo,
|
||||
lapack_int n, lapack_complex_float* a,
|
||||
lapack_int lda, lapack_complex_float* b,
|
||||
lapack_int lda, const lapack_complex_float* b,
|
||||
lapack_int ldb );
|
||||
lapack_int LAPACKE_zhegst( int matrix_layout, lapack_int itype, char uplo,
|
||||
lapack_int n, lapack_complex_double* a,
|
||||
lapack_int lda, lapack_complex_double* b,
|
||||
lapack_int lda, const lapack_complex_double* b,
|
||||
lapack_int ldb );
|
||||
|
||||
lapack_int LAPACKE_chegv( int matrix_layout, lapack_int itype, char jobz,
|
||||
|
@ -2598,6 +2598,15 @@ lapack_int LAPACKE_sorgtr( int matrix_layout, char uplo, lapack_int n, float* a,
|
|||
lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a,
|
||||
lapack_int lda, const double* tau );
|
||||
|
||||
lapack_int LAPACKE_sorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
float* a, lapack_int lda,
|
||||
const float* t, lapack_int ldt );
|
||||
lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
double* a, lapack_int lda,
|
||||
const double* t, lapack_int ldt );
|
||||
|
||||
lapack_int LAPACKE_sormbr( int matrix_layout, char vect, char side, char trans,
|
||||
lapack_int m, lapack_int n, lapack_int k,
|
||||
const float* a, lapack_int lda, const float* tau,
|
||||
|
@ -4577,6 +4586,15 @@ lapack_int LAPACKE_zungtr( int matrix_layout, char uplo, lapack_int n,
|
|||
lapack_complex_double* a, lapack_int lda,
|
||||
const lapack_complex_double* tau );
|
||||
|
||||
lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
const lapack_complex_float* t, lapack_int ldt );
|
||||
lapack_int LAPACKE_zungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
lapack_complex_double* a, lapack_int lda,
|
||||
const lapack_complex_double* t, lapack_int ldt );
|
||||
|
||||
lapack_int LAPACKE_cunmbr( int matrix_layout, char vect, char side, char trans,
|
||||
lapack_int m, lapack_int n, lapack_int k,
|
||||
const lapack_complex_float* a, lapack_int lda,
|
||||
|
@ -6932,11 +6950,11 @@ lapack_int LAPACKE_zheevx_work( int matrix_layout, char jobz, char range,
|
|||
|
||||
lapack_int LAPACKE_chegst_work( int matrix_layout, lapack_int itype, char uplo,
|
||||
lapack_int n, lapack_complex_float* a,
|
||||
lapack_int lda, lapack_complex_float* b,
|
||||
lapack_int lda, const lapack_complex_float* b,
|
||||
lapack_int ldb );
|
||||
lapack_int LAPACKE_zhegst_work( int matrix_layout, lapack_int itype, char uplo,
|
||||
lapack_int n, lapack_complex_double* a,
|
||||
lapack_int lda, lapack_complex_double* b,
|
||||
lapack_int lda, const lapack_complex_double* b,
|
||||
lapack_int ldb );
|
||||
|
||||
lapack_int LAPACKE_chegv_work( int matrix_layout, lapack_int itype, char jobz,
|
||||
|
@ -7880,6 +7898,19 @@ lapack_int LAPACKE_dorgtr_work( int matrix_layout, char uplo, lapack_int n,
|
|||
double* a, lapack_int lda, const double* tau,
|
||||
double* work, lapack_int lwork );
|
||||
|
||||
lapack_int LAPACKE_sorgtsqr_row_work( int matrix_layout,
|
||||
lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
float* a, lapack_int lda,
|
||||
const float* t, lapack_int ldt,
|
||||
float* work, lapack_int lwork );
|
||||
lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout,
|
||||
lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
double* a, lapack_int lda,
|
||||
const double* t, lapack_int ldt,
|
||||
double* work, lapack_int lwork );
|
||||
|
||||
lapack_int LAPACKE_sormbr_work( int matrix_layout, char vect, char side,
|
||||
char trans, lapack_int m, lapack_int n,
|
||||
lapack_int k, const float* a, lapack_int lda,
|
||||
|
@ -10281,6 +10312,19 @@ lapack_int LAPACKE_zungtr_work( int matrix_layout, char uplo, lapack_int n,
|
|||
const lapack_complex_double* tau,
|
||||
lapack_complex_double* work, lapack_int lwork );
|
||||
|
||||
lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout,
|
||||
lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
const lapack_complex_float* t, lapack_int ldt,
|
||||
lapack_complex_float* work, lapack_int lwork );
|
||||
lapack_int LAPACKE_zungtsqr_row_work( int matrix_layout,
|
||||
lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
lapack_complex_double* a, lapack_int lda,
|
||||
const lapack_complex_double* t, lapack_int ldt,
|
||||
lapack_complex_double* work, lapack_int lwork );
|
||||
|
||||
lapack_int LAPACKE_cunmbr_work( int matrix_layout, char vect, char side,
|
||||
char trans, lapack_int m, lapack_int n,
|
||||
lapack_int k, const lapack_complex_float* a,
|
||||
|
@ -10553,11 +10597,11 @@ lapack_int LAPACKE_csytri2x_work( int matrix_layout, char uplo, lapack_int n,
|
|||
const lapack_int* ipiv,
|
||||
lapack_complex_float* work, lapack_int nb );
|
||||
lapack_int LAPACKE_csytrs2( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, lapack_complex_float* a,
|
||||
lapack_int nrhs, const lapack_complex_float* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
lapack_complex_float* b, lapack_int ldb );
|
||||
lapack_int LAPACKE_csytrs2_work( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, lapack_complex_float* a,
|
||||
lapack_int nrhs, const lapack_complex_float* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
lapack_complex_float* b, lapack_int ldb,
|
||||
lapack_complex_float* work );
|
||||
|
@ -10718,10 +10762,10 @@ lapack_int LAPACKE_dsytri2x_work( int matrix_layout, char uplo, lapack_int n,
|
|||
const lapack_int* ipiv, double* work,
|
||||
lapack_int nb );
|
||||
lapack_int LAPACKE_dsytrs2( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, double* a, lapack_int lda,
|
||||
lapack_int nrhs, const double* a, lapack_int lda,
|
||||
const lapack_int* ipiv, double* b, lapack_int ldb );
|
||||
lapack_int LAPACKE_dsytrs2_work( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, double* a,
|
||||
lapack_int nrhs, const double* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
double* b, lapack_int ldb, double* work );
|
||||
lapack_int LAPACKE_sbbcsd( int matrix_layout, char jobu1, char jobu2,
|
||||
|
@ -10813,10 +10857,10 @@ lapack_int LAPACKE_ssytri2x_work( int matrix_layout, char uplo, lapack_int n,
|
|||
const lapack_int* ipiv, float* work,
|
||||
lapack_int nb );
|
||||
lapack_int LAPACKE_ssytrs2( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, float* a, lapack_int lda,
|
||||
lapack_int nrhs, const float* a, lapack_int lda,
|
||||
const lapack_int* ipiv, float* b, lapack_int ldb );
|
||||
lapack_int LAPACKE_ssytrs2_work( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, float* a,
|
||||
lapack_int nrhs, const float* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
float* b, lapack_int ldb, float* work );
|
||||
lapack_int LAPACKE_zbbcsd( int matrix_layout, char jobu1, char jobu2,
|
||||
|
@ -10898,11 +10942,11 @@ lapack_int LAPACKE_zsytri2x_work( int matrix_layout, char uplo, lapack_int n,
|
|||
const lapack_int* ipiv,
|
||||
lapack_complex_double* work, lapack_int nb );
|
||||
lapack_int LAPACKE_zsytrs2( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, lapack_complex_double* a,
|
||||
lapack_int nrhs, const lapack_complex_double* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
lapack_complex_double* b, lapack_int ldb );
|
||||
lapack_int LAPACKE_zsytrs2_work( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, lapack_complex_double* a,
|
||||
lapack_int nrhs, const lapack_complex_double* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
lapack_complex_double* b, lapack_int ldb,
|
||||
lapack_complex_double* work );
|
||||
|
@ -12026,6 +12070,44 @@ lapack_int LAPACKE_zgetsls_work( int matrix_layout, char trans, lapack_int m,
|
|||
lapack_complex_double* b, lapack_int ldb,
|
||||
lapack_complex_double* work, lapack_int lwork );
|
||||
|
||||
lapack_int LAPACKE_sgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
float* a, lapack_int lda,
|
||||
float* t, lapack_int ldt );
|
||||
lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
double* a, lapack_int lda,
|
||||
double* t, lapack_int ldt );
|
||||
lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
lapack_complex_float* t, lapack_int ldt );
|
||||
lapack_int LAPACKE_zgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
lapack_complex_double* a, lapack_int lda,
|
||||
lapack_complex_double* t, lapack_int ldt );
|
||||
|
||||
lapack_int LAPACKE_sgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
float* a, lapack_int lda,
|
||||
float* t, lapack_int ldt,
|
||||
float* work, lapack_int lwork );
|
||||
lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
double* a, lapack_int lda,
|
||||
double* t, lapack_int ldt,
|
||||
double* work, lapack_int lwork );
|
||||
lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
lapack_complex_float* t, lapack_int ldt,
|
||||
lapack_complex_float* work, lapack_int lwork );
|
||||
lapack_int LAPACKE_zgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
lapack_complex_double* a, lapack_int lda,
|
||||
lapack_complex_double* t, lapack_int ldt,
|
||||
lapack_complex_double* work, lapack_int lwork );
|
||||
|
||||
lapack_int LAPACKE_ssyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n,
|
||||
float* a, lapack_int lda, float* w );
|
||||
lapack_int LAPACKE_dsyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n,
|
||||
|
|
|
@ -162,6 +162,8 @@ lapacke_cgetrs.o \
|
|||
lapacke_cgetrs_work.o \
|
||||
lapacke_cgetsls.o \
|
||||
lapacke_cgetsls_work.o \
|
||||
lapacke_cgetsqrhrt.o \
|
||||
lapacke_cgetsqrhrt_work.o \
|
||||
lapacke_cggbak.o \
|
||||
lapacke_cggbak_work.o \
|
||||
lapacke_cggbal.o \
|
||||
|
@ -634,6 +636,8 @@ lapacke_cungrq.o \
|
|||
lapacke_cungrq_work.o \
|
||||
lapacke_cungtr.o \
|
||||
lapacke_cungtr_work.o \
|
||||
lapacke_cungtsqr_row.o \
|
||||
lapacke_cungtsqr_row_work.o \
|
||||
lapacke_cunmbr.o \
|
||||
lapacke_cunmbr_work.o \
|
||||
lapacke_cunmhr.o \
|
||||
|
@ -778,6 +782,8 @@ lapacke_dgetrs.o \
|
|||
lapacke_dgetrs_work.o \
|
||||
lapacke_dgetsls.o \
|
||||
lapacke_dgetsls_work.o \
|
||||
lapacke_dgetsqrhrt.o \
|
||||
lapacke_dgetsqrhrt_work.o \
|
||||
lapacke_dggbak.o \
|
||||
lapacke_dggbak_work.o \
|
||||
lapacke_dggbal.o \
|
||||
|
@ -900,6 +906,8 @@ lapacke_dorgrq.o \
|
|||
lapacke_dorgrq_work.o \
|
||||
lapacke_dorgtr.o \
|
||||
lapacke_dorgtr_work.o \
|
||||
lapacke_dorgtsqr_row.o \
|
||||
lapacke_dorgtsqr_row_work.o \
|
||||
lapacke_dormbr.o \
|
||||
lapacke_dormbr_work.o \
|
||||
lapacke_dormhr.o \
|
||||
|
@ -1348,6 +1356,8 @@ lapacke_sgetrs.o \
|
|||
lapacke_sgetrs_work.o \
|
||||
lapacke_sgetsls.o \
|
||||
lapacke_sgetsls_work.o \
|
||||
lapacke_sgetsqrhrt.o \
|
||||
lapacke_sgetsqrhrt_work.o \
|
||||
lapacke_sggbak.o \
|
||||
lapacke_sggbak_work.o \
|
||||
lapacke_sggbal.o \
|
||||
|
@ -1468,6 +1478,8 @@ lapacke_sorgrq.o \
|
|||
lapacke_sorgrq_work.o \
|
||||
lapacke_sorgtr.o \
|
||||
lapacke_sorgtr_work.o \
|
||||
lapacke_sorgtsqr_row.o \
|
||||
lapacke_sorgtsqr_row_work.o \
|
||||
lapacke_sormbr.o \
|
||||
lapacke_sormbr_work.o \
|
||||
lapacke_sormhr.o \
|
||||
|
@ -1908,6 +1920,8 @@ lapacke_zgetrs.o \
|
|||
lapacke_zgetrs_work.o \
|
||||
lapacke_zgetsls.o \
|
||||
lapacke_zgetsls_work.o \
|
||||
lapacke_zgetsqrhrt.o \
|
||||
lapacke_zgetsqrhrt_work.o \
|
||||
lapacke_zggbak.o \
|
||||
lapacke_zggbak_work.o \
|
||||
lapacke_zggbal.o \
|
||||
|
@ -2380,6 +2394,8 @@ lapacke_zungrq.o \
|
|||
lapacke_zungrq_work.o \
|
||||
lapacke_zungtr.o \
|
||||
lapacke_zungtr_work.o \
|
||||
lapacke_zungtsqr_row.o \
|
||||
lapacke_zungtsqr_row_work.o \
|
||||
lapacke_zunmbr.o \
|
||||
lapacke_zunmbr_work.o \
|
||||
lapacke_zunmhr.o \
|
||||
|
|
|
@ -56,6 +56,8 @@ lapack_int LAPACKE_cgesvd_work( int matrix_layout, char jobu, char jobvt,
|
|||
( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
|
||||
lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
|
||||
( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
|
||||
lapack_int ncols_vt = ( LAPACKE_lsame( jobvt, 'a' ) ||
|
||||
LAPACKE_lsame( jobvt, 's' ) ) ? n : 1;
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_int ldu_t = MAX(1,nrows_u);
|
||||
lapack_int ldvt_t = MAX(1,nrows_vt);
|
||||
|
@ -73,7 +75,7 @@ lapack_int LAPACKE_cgesvd_work( int matrix_layout, char jobu, char jobvt,
|
|||
LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
|
||||
return info;
|
||||
}
|
||||
if( ldvt < n ) {
|
||||
if( ldvt < ncols_vt ) {
|
||||
info = -12;
|
||||
LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
|
||||
return info;
|
||||
|
|
|
@ -0,0 +1,80 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native high-level C interface to LAPACK function cgetsqrhrt
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
lapack_complex_float* t, lapack_int ldt )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
lapack_int lwork = -1;
|
||||
lapack_complex_float* work = NULL;
|
||||
lapack_complex_float work_query;
|
||||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", -1 );
|
||||
return -1;
|
||||
}
|
||||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
|
||||
return -7;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Query optimal working array(s) size */
|
||||
info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
|
||||
a, lda, t, ldt, &work_query, lwork );
|
||||
if( info != 0 ) {
|
||||
goto exit_level_0;
|
||||
}
|
||||
lwork = LAPACK_C2INT( work_query );
|
||||
/* Allocate memory for work arrays */
|
||||
work = (lapack_complex_float*)
|
||||
LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
|
||||
if( work == NULL ) {
|
||||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
/* Call middle-level interface */
|
||||
info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
|
||||
a, lda, t, ldt, work, lwork );
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( work );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_WORK_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native middle-level C interface to LAPACK function cgetsqrhrt
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
lapack_complex_float* t, lapack_int ldt,
|
||||
lapack_complex_float* work, lapack_int lwork )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
|
||||
work, &lwork, &info );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_complex_float* a_t = NULL;
|
||||
lapack_int ldt_t = MAX(1,nb2);
|
||||
lapack_complex_float* t_t = NULL;
|
||||
/* Check leading dimension(s) */
|
||||
if( lda < n ) {
|
||||
info = -8;
|
||||
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
|
||||
return info;
|
||||
}
|
||||
if( ldt < n ) {
|
||||
info = -10;
|
||||
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
|
||||
return info;
|
||||
}
|
||||
/* Query optimal working array(s) size if requested */
|
||||
if( lwork == -1 ) {
|
||||
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
return (info < 0) ? (info - 1) : info;
|
||||
}
|
||||
/* Allocate memory for temporary array(s) */
|
||||
a_t = (lapack_complex_float*)
|
||||
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
|
||||
if( a_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
t_t = (lapack_complex_float*)
|
||||
LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) );
|
||||
if( t_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_1;
|
||||
}
|
||||
/* Transpose input matrices */
|
||||
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
|
||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( t_t );
|
||||
exit_level_1:
|
||||
LAPACKE_free( a_t );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
|
||||
}
|
||||
} else {
|
||||
info = -1;
|
||||
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -78,7 +78,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo,
|
|||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
if ( jobz == 'V') {
|
||||
if ( jobz == 'V' || jobz == 'v' ) {
|
||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
||||
} else {
|
||||
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||
|
|
|
@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo,
|
|||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
if ( jobz == 'V') {
|
||||
if ( jobz == 'V' || jobz == 'v' ) {
|
||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
||||
} else {
|
||||
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||
|
|
|
@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo,
|
|||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
if ( jobz == 'V') {
|
||||
if ( jobz == 'V' || jobz == 'v' ) {
|
||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
||||
} else {
|
||||
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
|
||||
lapack_int LAPACKE_chegst( int matrix_layout, lapack_int itype, char uplo,
|
||||
lapack_int n, lapack_complex_float* a,
|
||||
lapack_int lda, lapack_complex_float* b,
|
||||
lapack_int lda, const lapack_complex_float* b,
|
||||
lapack_int ldb )
|
||||
{
|
||||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||
|
|
|
@ -35,7 +35,7 @@
|
|||
|
||||
lapack_int LAPACKE_chegst_work( int matrix_layout, lapack_int itype, char uplo,
|
||||
lapack_int n, lapack_complex_float* a,
|
||||
lapack_int lda, lapack_complex_float* b,
|
||||
lapack_int lda, const lapack_complex_float* b,
|
||||
lapack_int ldb )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
|
|
|
@ -50,10 +50,10 @@ lapack_int LAPACKE_chegv( int matrix_layout, lapack_int itype, char jobz,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -6;
|
||||
}
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
|
||||
return -8;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,10 +50,10 @@ lapack_int LAPACKE_chegv_2stage( int matrix_layout, lapack_int itype, char jobz,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -6;
|
||||
}
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
|
||||
return -8;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -55,10 +55,10 @@ lapack_int LAPACKE_chegvd( int matrix_layout, lapack_int itype, char jobz,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -6;
|
||||
}
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
|
||||
return -8;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,7 +60,7 @@ lapack_int LAPACKE_chegvx( int matrix_layout, lapack_int itype, char jobz,
|
|||
if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) {
|
||||
return -15;
|
||||
}
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
|
||||
return -9;
|
||||
}
|
||||
if( LAPACKE_lsame( range, 'v' ) ) {
|
||||
|
|
|
@ -46,7 +46,7 @@ lapack_int LAPACKE_chetri2x( int matrix_layout, char uplo, lapack_int n,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -4;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -42,9 +42,6 @@ lapack_int LAPACKE_clacpy_work( int matrix_layout, char uplo, lapack_int m,
|
|||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_clacpy( &uplo, &m, &n, a, &lda, b, &ldb );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_int ldb_t = MAX(1,m);
|
||||
|
|
|
@ -41,45 +41,46 @@ float LAPACKE_clantr_work( int matrix_layout, char norm, char uplo,
|
|||
lapack_int info = 0;
|
||||
float res = 0.;
|
||||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
/* Call LAPACK function */
|
||||
res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_complex_float* a_t = NULL;
|
||||
float* work_lapack = NULL;
|
||||
char norm_lapack;
|
||||
char uplo_lapack;
|
||||
/* Check leading dimension(s) */
|
||||
if( lda < n ) {
|
||||
info = -8;
|
||||
LAPACKE_xerbla( "LAPACKE_clantr_work", info );
|
||||
return info;
|
||||
}
|
||||
/* Allocate memory for temporary array(s) */
|
||||
a_t = (lapack_complex_float*)
|
||||
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,MAX(m,n)) );
|
||||
if( a_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
|
||||
norm_lapack = 'i';
|
||||
} else if( LAPACKE_lsame( norm, 'i' ) ) {
|
||||
norm_lapack = '1';
|
||||
} else {
|
||||
norm_lapack = norm;
|
||||
}
|
||||
if( LAPACKE_lsame( uplo, 'u' ) ) {
|
||||
uplo_lapack = 'l';
|
||||
} else {
|
||||
uplo_lapack = 'u';
|
||||
}
|
||||
/* Allocate memory for work array(s) */
|
||||
if( LAPACKE_lsame( norm, 'i' ) ) {
|
||||
work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,m) );
|
||||
if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
|
||||
work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) );
|
||||
if( work_lapack == NULL ) {
|
||||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_1;
|
||||
goto exit_level_0;
|
||||
}
|
||||
}
|
||||
/* Transpose input matrices */
|
||||
LAPACKE_ctr_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t );
|
||||
/* Call LAPACK function and adjust info */
|
||||
res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
|
||||
/* Call LAPACK function */
|
||||
res = LAPACK_clantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
|
||||
/* Release memory and exit */
|
||||
if( work_lapack ) {
|
||||
LAPACKE_free( work_lapack );
|
||||
}
|
||||
exit_level_1:
|
||||
LAPACKE_free( a_t );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
|
||||
if( info == LAPACK_WORK_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_clantr_work", info );
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -83,6 +83,7 @@ lapack_int LAPACKE_clascl( int matrix_layout, char type, lapack_int kl,
|
|||
LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) {
|
||||
return -9;
|
||||
}
|
||||
break;
|
||||
case 'B':
|
||||
// TYPE = 'B' - lower part of symmetric band matrix (assume m==n)
|
||||
if( LAPACKE_chb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) {
|
||||
|
|
|
@ -42,9 +42,6 @@ lapack_int LAPACKE_claset_work( int matrix_layout, char uplo, lapack_int m,
|
|||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_claset( &uplo, &m, &n, &alpha, &beta, a, &lda );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_complex_float* a_t = NULL;
|
||||
|
|
|
@ -45,7 +45,7 @@ lapack_int LAPACKE_csyconv( int matrix_layout, char uplo, char way, lapack_int n
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -5;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_csytrs2( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, lapack_complex_float* a,
|
||||
lapack_int nrhs, const lapack_complex_float* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
lapack_complex_float* b, lapack_int ldb )
|
||||
{
|
||||
|
|
|
@ -34,7 +34,7 @@
|
|||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_csytrs2_work( int matrix_layout, char uplo, lapack_int n,
|
||||
lapack_int nrhs, lapack_complex_float* a,
|
||||
lapack_int nrhs, const lapack_complex_float* a,
|
||||
lapack_int lda, const lapack_int* ipiv,
|
||||
lapack_complex_float* b, lapack_int ldb,
|
||||
lapack_complex_float* work )
|
||||
|
|
|
@ -44,7 +44,7 @@ lapack_int LAPACKE_ctrttf( int matrix_layout, char transr, char uplo,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_ctr_nancheck( matrix_layout, uplo, 'n', n, a, lda ) ) {
|
||||
return -5;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,7 +44,7 @@ lapack_int LAPACKE_ctrttp( int matrix_layout, char uplo, lapack_int n,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_ctr_nancheck( matrix_layout, uplo, 'n', n, a, lda ) ) {
|
||||
return -4;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ lapack_int LAPACKE_cungtr( int matrix_layout, char uplo, lapack_int n,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -4;
|
||||
}
|
||||
if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) {
|
||||
|
|
|
@ -0,0 +1,83 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native high-level C interface to LAPACK function cungtsqr_row
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
const lapack_complex_float* t, lapack_int ldt )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
lapack_int lwork = -1;
|
||||
lapack_complex_float* work = NULL;
|
||||
lapack_complex_float work_query;
|
||||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_cungtsqr_row", -1 );
|
||||
return -1;
|
||||
}
|
||||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
|
||||
return -6;
|
||||
}
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
|
||||
return -8;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Query optimal working array(s) size */
|
||||
info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb,
|
||||
a, lda, t, ldt, &work_query, lwork );
|
||||
if( info != 0 ) {
|
||||
goto exit_level_0;
|
||||
}
|
||||
lwork = LAPACK_C2INT( work_query );
|
||||
/* Allocate memory for work arrays */
|
||||
work = (lapack_complex_float*)
|
||||
LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
|
||||
if( work == NULL ) {
|
||||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
/* Call middle-level interface */
|
||||
info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb,
|
||||
a, lda, t, ldt, work, lwork );
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( work );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_WORK_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_cungtsqr_row", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -0,0 +1,109 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native middle-level C interface to LAPACK function cungtsqr_row
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
lapack_complex_float* a, lapack_int lda,
|
||||
const lapack_complex_float* t, lapack_int ldt,
|
||||
lapack_complex_float* work, lapack_int lwork )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
if (matrix_layout == LAPACK_COL_MAJOR) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
|
||||
work, &lwork, &info);
|
||||
if (info < 0) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_complex_float* a_t = NULL;
|
||||
/* Check leading dimension(s) */
|
||||
if( lda < n ) {
|
||||
info = -7;
|
||||
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
|
||||
return info;
|
||||
}
|
||||
lapack_int ldt_t = MAX(1,nb);
|
||||
lapack_complex_float* t_t = NULL;
|
||||
/* Check leading dimension(s) */
|
||||
if( ldt < n ) {
|
||||
info = -9;
|
||||
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
|
||||
return info;
|
||||
}
|
||||
/* Query optimal working array(s) size if requested */
|
||||
if( lwork == -1 ) {
|
||||
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
return (info < 0) ? (info - 1) : info;
|
||||
}
|
||||
/* Allocate memory for temporary array(s) */
|
||||
a_t = (lapack_complex_float*)
|
||||
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
|
||||
if( a_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
t_t = (lapack_complex_float*)
|
||||
LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) );
|
||||
if( t_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_1;
|
||||
}
|
||||
/* Transpose input matrices */
|
||||
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
|
||||
LAPACKE_cge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( t_t );
|
||||
exit_level_1:
|
||||
LAPACKE_free( a_t );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
|
||||
}
|
||||
} else {
|
||||
info = -1;
|
||||
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -52,7 +52,7 @@ lapack_int LAPACKE_cunmtr( int matrix_layout, char side, char uplo, char trans,
|
|||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
r = LAPACKE_lsame( side, 'l' ) ? m : n;
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, r, r, a, lda ) ) {
|
||||
if( LAPACKE_che_nancheck( matrix_layout, uplo, r, a, lda ) ) {
|
||||
return -7;
|
||||
}
|
||||
if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {
|
||||
|
|
|
@ -54,6 +54,8 @@ lapack_int LAPACKE_dgesvd_work( int matrix_layout, char jobu, char jobvt,
|
|||
( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
|
||||
lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
|
||||
( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
|
||||
lapack_int ncols_vt = ( LAPACKE_lsame( jobvt, 'a' ) ||
|
||||
LAPACKE_lsame( jobvt, 's' ) ) ? n : 1;
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_int ldu_t = MAX(1,nrows_u);
|
||||
lapack_int ldvt_t = MAX(1,nrows_vt);
|
||||
|
@ -71,7 +73,7 @@ lapack_int LAPACKE_dgesvd_work( int matrix_layout, char jobu, char jobvt,
|
|||
LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
|
||||
return info;
|
||||
}
|
||||
if( ldvt < n ) {
|
||||
if( ldvt < ncols_vt ) {
|
||||
info = -12;
|
||||
LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
|
||||
return info;
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native high-level C interface to LAPACK function dgetsqrhrt
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
double* a, lapack_int lda,
|
||||
double* t, lapack_int ldt )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
lapack_int lwork = -1;
|
||||
double* work = NULL;
|
||||
double work_query;
|
||||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", -1 );
|
||||
return -1;
|
||||
}
|
||||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
|
||||
return -7;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Query optimal working array(s) size */
|
||||
info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
|
||||
a, lda, t, ldt, &work_query, lwork );
|
||||
if( info != 0 ) {
|
||||
goto exit_level_0;
|
||||
}
|
||||
lwork = (lapack_int)work_query;
|
||||
/* Allocate memory for work arrays */
|
||||
work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
|
||||
if( work == NULL ) {
|
||||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
/* Call middle-level interface */
|
||||
info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
|
||||
a, lda, t, ldt, work, lwork );
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( work );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_WORK_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -0,0 +1,106 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native middle-level C interface to LAPACK function dgetsqrhrt
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb1, lapack_int nb1, lapack_int nb2,
|
||||
double* a, lapack_int lda,
|
||||
double* t, lapack_int ldt,
|
||||
double* work, lapack_int lwork )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
|
||||
work, &lwork, &info );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
double* a_t = NULL;
|
||||
lapack_int ldt_t = MAX(1,nb2);
|
||||
double* t_t = NULL;
|
||||
/* Check leading dimension(s) */
|
||||
if( lda < n ) {
|
||||
info = -8;
|
||||
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
|
||||
return info;
|
||||
}
|
||||
if( ldt < n ) {
|
||||
info = -10;
|
||||
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
|
||||
return info;
|
||||
}
|
||||
/* Query optimal working array(s) size if requested */
|
||||
if( lwork == -1 ) {
|
||||
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
return (info < 0) ? (info - 1) : info;
|
||||
}
|
||||
/* Allocate memory for temporary array(s) */
|
||||
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
|
||||
if( a_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) );
|
||||
if( t_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_1;
|
||||
}
|
||||
/* Transpose input matrices */
|
||||
LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
|
||||
LAPACKE_dge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( t_t );
|
||||
exit_level_1:
|
||||
LAPACKE_free( a_t );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
|
||||
}
|
||||
} else {
|
||||
info = -1;
|
||||
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -41,9 +41,6 @@ lapack_int LAPACKE_dlacpy_work( int matrix_layout, char uplo, lapack_int m,
|
|||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_dlacpy( &uplo, &m, &n, a, &lda, b, &ldb );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
lapack_int ldb_t = MAX(1,m);
|
||||
|
|
|
@ -40,44 +40,46 @@ double LAPACKE_dlantr_work( int matrix_layout, char norm, char uplo,
|
|||
lapack_int info = 0;
|
||||
double res = 0.;
|
||||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
/* Call LAPACK function */
|
||||
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
double* a_t = NULL;
|
||||
double* work_lapack = NULL;
|
||||
char norm_lapack;
|
||||
char uplo_lapack;
|
||||
/* Check leading dimension(s) */
|
||||
if( lda < n ) {
|
||||
info = -8;
|
||||
LAPACKE_xerbla( "LAPACKE_dlantr_work", info );
|
||||
return info;
|
||||
}
|
||||
/* Allocate memory for temporary array(s) */
|
||||
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,MAX(m,n)) );
|
||||
if( a_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
|
||||
norm_lapack = 'i';
|
||||
} else if( LAPACKE_lsame( norm, 'i' ) ) {
|
||||
norm_lapack = '1';
|
||||
} else {
|
||||
norm_lapack = norm;
|
||||
}
|
||||
if( LAPACKE_lsame( uplo, 'u' ) ) {
|
||||
uplo_lapack = 'l';
|
||||
} else {
|
||||
uplo_lapack = 'u';
|
||||
}
|
||||
/* Allocate memory for work array(s) */
|
||||
if( LAPACKE_lsame( norm, 'i' ) ) {
|
||||
work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,m) );
|
||||
if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
|
||||
work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) );
|
||||
if( work_lapack == NULL ) {
|
||||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_1;
|
||||
goto exit_level_0;
|
||||
}
|
||||
}
|
||||
/* Transpose input matrices */
|
||||
LAPACKE_dtr_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t );
|
||||
/* Call LAPACK function and adjust info */
|
||||
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
|
||||
/* Call LAPACK function */
|
||||
res = LAPACK_dlantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
|
||||
/* Release memory and exit */
|
||||
if( work_lapack ) {
|
||||
LAPACKE_free( work_lapack );
|
||||
}
|
||||
exit_level_1:
|
||||
LAPACKE_free( a_t );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
|
||||
if( info == LAPACK_WORK_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_dlantr_work", info );
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -83,6 +83,7 @@ lapack_int LAPACKE_dlascl( int matrix_layout, char type, lapack_int kl,
|
|||
LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) {
|
||||
return -9;
|
||||
}
|
||||
break;
|
||||
case 'B':
|
||||
// TYPE = 'B' - lower part of symmetric band matrix (assume m==n)
|
||||
if( LAPACKE_dsb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) {
|
||||
|
|
|
@ -41,9 +41,6 @@ lapack_int LAPACKE_dlaset_work( int matrix_layout, char uplo, lapack_int m,
|
|||
if( matrix_layout == LAPACK_COL_MAJOR ) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_dlaset( &uplo, &m, &n, &alpha, &beta, a, &lda );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
double* a_t = NULL;
|
||||
|
|
|
@ -47,7 +47,7 @@ lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -4;
|
||||
}
|
||||
if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) {
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native high-level C interface to LAPACK function dorgtsqr_row
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
double* a, lapack_int lda,
|
||||
const double* t, lapack_int ldt )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
lapack_int lwork = -1;
|
||||
double* work = NULL;
|
||||
double work_query;
|
||||
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", -1 );
|
||||
return -1;
|
||||
}
|
||||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
|
||||
return -6;
|
||||
}
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
|
||||
return -8;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
/* Query optimal working array(s) size */
|
||||
info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb,
|
||||
a, lda, t, ldt, &work_query, lwork );
|
||||
if( info != 0 ) {
|
||||
goto exit_level_0;
|
||||
}
|
||||
lwork = (lapack_int)work_query;
|
||||
/* Allocate memory for work arrays */
|
||||
work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
|
||||
if( work == NULL ) {
|
||||
info = LAPACK_WORK_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
/* Call middle-level interface */
|
||||
info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb,
|
||||
a, lda, t, ldt, work, lwork );
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( work );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_WORK_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -0,0 +1,108 @@
|
|||
/*****************************************************************************
|
||||
Copyright (c) 2020, Intel Corp.
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of Intel Corporation nor the names of its contributors
|
||||
may be used to endorse or promote products derived from this software
|
||||
without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
|
||||
THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************
|
||||
* Contents: Native middle-level C interface to LAPACK function dorgtsqr_row
|
||||
* Author: Intel Corporation
|
||||
*****************************************************************************/
|
||||
|
||||
#include "lapacke_utils.h"
|
||||
|
||||
lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
|
||||
lapack_int mb, lapack_int nb,
|
||||
double* a, lapack_int lda,
|
||||
const double* t, lapack_int ldt,
|
||||
double* work, lapack_int lwork )
|
||||
{
|
||||
lapack_int info = 0;
|
||||
if (matrix_layout == LAPACK_COL_MAJOR) {
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
|
||||
work, &lwork, &info);
|
||||
if (info < 0) {
|
||||
info = info - 1;
|
||||
}
|
||||
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
|
||||
lapack_int lda_t = MAX(1,m);
|
||||
double* a_t = NULL;
|
||||
/* Check leading dimension(s) */
|
||||
if( lda < n ) {
|
||||
info = -7;
|
||||
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
|
||||
return info;
|
||||
}
|
||||
lapack_int ldt_t = MAX(1,nb);
|
||||
double* t_t = NULL;
|
||||
/* Check leading dimension(s) */
|
||||
if( ldt < n ) {
|
||||
info = -9;
|
||||
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
|
||||
return info;
|
||||
}
|
||||
/* Query optimal working array(s) size if requested */
|
||||
if( lwork == -1 ) {
|
||||
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
return (info < 0) ? (info - 1) : info;
|
||||
}
|
||||
/* Allocate memory for temporary array(s) */
|
||||
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
|
||||
if( a_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_0;
|
||||
}
|
||||
t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) );
|
||||
if( t_t == NULL ) {
|
||||
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
|
||||
goto exit_level_1;
|
||||
}
|
||||
/* Transpose input matrices */
|
||||
LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
|
||||
LAPACKE_dge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
|
||||
/* Call LAPACK function and adjust info */
|
||||
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
|
||||
work, &lwork, &info );
|
||||
if( info < 0 ) {
|
||||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
|
||||
|
||||
/* Release memory and exit */
|
||||
LAPACKE_free( t_t );
|
||||
exit_level_1:
|
||||
LAPACKE_free( a_t );
|
||||
exit_level_0:
|
||||
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
|
||||
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
|
||||
}
|
||||
} else {
|
||||
info = -1;
|
||||
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
|
||||
}
|
||||
return info;
|
||||
}
|
|
@ -51,7 +51,7 @@ lapack_int LAPACKE_dormtr( int matrix_layout, char side, char uplo, char trans,
|
|||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
r = LAPACKE_lsame( side, 'l' ) ? m : n;
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, r, r, a, lda ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, r, a, lda ) ) {
|
||||
return -7;
|
||||
}
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) {
|
||||
|
|
|
@ -43,7 +43,7 @@ lapack_int LAPACKE_dsyconv( int matrix_layout, char uplo, char way, lapack_int n
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -5;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -72,7 +72,7 @@ lapack_int LAPACKE_dsyev_work( int matrix_layout, char jobz, char uplo,
|
|||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
if ( jobz == 'V') {
|
||||
if ( jobz == 'V' || jobz == 'v' ) {
|
||||
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
||||
} else {
|
||||
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||
|
|
|
@ -76,7 +76,7 @@ lapack_int LAPACKE_dsyevd_2stage_work( int matrix_layout, char jobz, char uplo,
|
|||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
if ( jobz == 'V') {
|
||||
if ( jobz == 'V' || jobz == 'v' ) {
|
||||
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
||||
} else {
|
||||
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||
|
|
|
@ -76,7 +76,7 @@ lapack_int LAPACKE_dsyevd_work( int matrix_layout, char jobz, char uplo,
|
|||
info = info - 1;
|
||||
}
|
||||
/* Transpose output matrices */
|
||||
if ( jobz == 'V') {
|
||||
if ( jobz == 'V' || jobz == 'v' ) {
|
||||
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
|
||||
} else {
|
||||
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );
|
||||
|
|
|
@ -47,7 +47,7 @@ lapack_int LAPACKE_dsygst( int matrix_layout, lapack_int itype, char uplo,
|
|||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -5;
|
||||
}
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
|
||||
return -7;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,10 +48,10 @@ lapack_int LAPACKE_dsygv( int matrix_layout, lapack_int itype, char jobz,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -6;
|
||||
}
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
|
||||
return -8;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,10 +48,10 @@ lapack_int LAPACKE_dsygv_2stage( int matrix_layout, lapack_int itype, char jobz,
|
|||
#ifndef LAPACK_DISABLE_NAN_CHECK
|
||||
if( LAPACKE_get_nancheck() ) {
|
||||
/* Optionally check input matrices for NaNs */
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
|
||||
return -6;
|
||||
}
|
||||
if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
|
||||
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
|
||||
return -8;
|
||||
}
|
||||
}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue