Merge pull request #3216 from xianyi/develop

Update from develop for 0.3.15 release
This commit is contained in:
Martin Kroeker 2021-05-02 23:48:28 +02:00 committed by GitHub
commit 65502c6af6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
236 changed files with 12364 additions and 2380 deletions

View File

@ -224,12 +224,21 @@ matrix:
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update
- brew install gcc@10
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
- BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
- <<: *test-macos
osx_image: xcode12
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update
script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
# - <<: *test-macos
# osx_image: xcode10
# env:

View File

@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 14)
set(OpenBLAS_PATCH_VERSION 14.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
# Adhere to GNU filesystem layout conventions

View File

@ -1,4 +1,54 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.15
2-May-2021
common:
- imported improvements and bugfixes from Reference-LAPACK 3.9.1
- imported LAPACKE interface fixes from Reference-LAPACK PRs 534 + 537
- fixed a problem in the cpu detection of 0.3.14 that prevented cross-compilation
- fixed a sequence problem in the generation of softlinks to the library in GMAKE
RISC V:
- fixed compilation on RISCV (missing entry in getarch)
- fixed a potential division by zero in CROTG and ZROTG
POWER:
- fixed LAPACK testsuite failures seen with the NVIDIA HPC compiler
- improved CGEMM, DGEMM and ZGEMM performance on POWER10
- added an optimized ZGEMV kernel for POWER10
- fixed a potential division by zero in CROTG and ZROTG
x86_64:
- added support for Intel Control-flow Enforcement Technology (CET)
- reverted the DOMATCOPY_RT code to the generic C version
- fixed a bug in the AVX512 SGEMM kernel introduced in 0.3.14
- fixed misapplication of -msse flag to non-SSE cpus in DYNAMIC_ARCH
- added support for compilation of the benchmarks on older OSX versions
- fix propagation of the NO_AVX512 option in CMAKE builds
- fix compilation of the AVX512 SGEMM kernel with clang-cl on Windows
- fixed compilation of the CTESTs with INTERFACE64=1 (random faults on OSX)
- corrected the Haswell DROT kernel to require AVX2/FMA3 rather than AVX512
ARM:
- fixed a potential division by zero in CROTG and ZROTG
- fixed a potential overflow in IMATCOPY/ZIMATCOPY and the CTESTs
ARM64:
- fixed spurious reads outside the array in the SGEMM tcopy macro
- fixed a potential division by zero in CROTG and ZROTG
- fixed a segmentation fault in DYNAMIC_ARCH builds (reappeared in 0.3.14)
MIPS
- fixed a potential division by zero in CROTG and ZROTG
- fixed a potential overflow in IMATCOPY/ZIMATCOPY and the CTESTs
MIPS64:
- fixed a potential division by zero in CROTG and ZROTG
SPARC:
- fixed a potential division by zero in CROTG and ZROTG
====================================================================
Version 0.3.14
17-Mar-2021

View File

@ -167,7 +167,6 @@ ifeq ($(NO_SHARED), 1)
$(error OpenBLAS: neither static nor shared are enabled.)
endif
endif
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@for d in $(SUBDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
@ -196,6 +195,7 @@ endif
ifdef USE_THREAD
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
endif
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@touch lib.grd
prof : prof_blas prof_lapack

View File

@ -3,7 +3,7 @@
#
# This library's version
VERSION = 0.3.14
VERSION = 0.3.14.dev
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

View File

@ -1,10 +1,21 @@
# COMPILER_PREFIX = mingw32-
ifdef HAVE_SSE
CCOMMON_OPT += -msse
FCOMMON_OPT += -msse
ifndef DYNAMIC_ARCH
ADD_CPUFLAGS = 1
else
ifdef TARGET_CORE
ADD_CPUFLAGS = 1
endif
endif
ifdef ADD_CPUFLAGS
ifdef HAVE_SSE
CCOMMON_OPT += -msse
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -msse
endif
endif
endif
ifeq ($(OSNAME), Interix)
ARFLAGS = -m x86

View File

@ -8,6 +8,16 @@ endif
endif
endif
ifndef DYNAMIC_ARCH
ADD_CPUFLAGS = 1
else
ifdef TARGET_CORE
ADD_CPUFLAGS = 1
endif
endif
ifdef ADD_CPUFLAGS
ifdef HAVE_SSE3
CCOMMON_OPT += -msse3
ifneq ($(F_COMPILER), NAG)
@ -44,7 +54,6 @@ endif
endif
ifeq ($(CORE), SKYLAKEX)
ifndef DYNAMIC_ARCH
ifndef NO_AVX512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
@ -62,10 +71,8 @@ endif
endif
endif
endif
endif
ifeq ($(CORE), COOPERLAKE)
ifndef DYNAMIC_ARCH
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# cooperlake support was added in 10.1
@ -88,7 +95,6 @@ endif
endif
endif
endif
endif
ifdef HAVE_AVX2
ifndef NO_AVX2
@ -120,6 +126,7 @@ endif
endif
endif
endif
ifeq ($(OSNAME), Interix)

View File

@ -4,7 +4,15 @@ trigger:
branches:
include:
- develop
resources:
containers:
- container: oneapi-hpckit
image: intel/oneapi-hpckit:latest
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
- container: oneapi-basekit
image: intel/oneapi-basekit:latest
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
jobs:
# manylinux1 is useful to test because the
# standard Docker container uses an old version
@ -68,4 +76,64 @@ jobs:
dir
openblas_utest.exe
- job: OSX_OpenMP
pool:
vmImage: 'macOS-10.15'
steps:
- script: |
brew update
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-10 FC=gfortran-10
- job: OSX_GCC_Nothreads
pool:
vmImage: 'macOS-10.15'
steps:
- script: |
brew update
make USE_THREADS=0 CC=gcc-10 FC=gfortran-10
- job: OSX_OpenMP_Clang
pool:
vmImage: 'macOS-10.15'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
brew update
brew install llvm libomp
make TARGET=CORE2 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang FC=gfortran-10
- job: OSX_Ifort_Clang
pool:
vmImage: 'macOS-10.15'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
steps:
- script: |
brew update
brew install llvm libomp
sudo mkdir -p /opt/intel
sudo chown $USER /opt/intel
displayName: prepare for cache restore
- task: Cache@2
inputs:
path: /opt/intel/oneapi
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
cacheHitVar: CACHE_RESTORED
- script: |
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
hdiutil attach webimage.dmg
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
installer_exit_code=$?
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
exit $installer_exit_code
displayName: install
condition: ne(variables.CACHE_RESTORED, 'true')
- script: |
source /opt/intel/oneapi/setvars.sh
make CC=/usr/local/opt/llvm/bin/clang FC=ifort

View File

@ -3,6 +3,8 @@
#include <time.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#endif
#include "common.h"

View File

@ -66,7 +66,7 @@ set(SLASRC
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
slarrv.f slartv.f
slarz.f slarzb.f slarzt.f slasy2.f
slasyf.f slasyf_rook.f slasyf_rk.f slasyf_aa.f
@ -112,14 +112,14 @@ set(SLASRC
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f
sgelqt.f sgelqt3.f sgemlqt.f
sgetsls.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
sgetsls.f sgetsqrhrt.f sgeqr.f slatsqr.f slamtsqr.f sgemqr.f
sgelq.f slaswlq.f slamswlq.f sgemlq.f
stplqt.f stplqt2.f stpmlqt.f
ssytrd_2stage.f ssytrd_sy2sb.f ssytrd_sb2st.F ssb2st_kernels.f
ssyevd_2stage.f ssyev_2stage.f ssyevx_2stage.f ssyevr_2stage.f
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorhr_col.f )
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f )
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@ -171,7 +171,7 @@ set(CLASRC
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
clarf.f clarfb.f clarfg.f clarfgp.f clarft.f
clarf.f clarfb.f clarfb_gett.f clarfg.f clarfgp.f clarft.f
clarfx.f clarfy.f clargv.f clarnv.f clarrv.f clartg.f clartv.f
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f
clasyf.f clasyf_rook.f clasyf_rk.f clasyf_aa.f
@ -209,14 +209,14 @@ set(CLASRC
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f
cgelqt.f cgelqt3.f cgemlqt.f
cgetsls.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
cgetsls.f cgetsqrhrt.f cgeqr.f clatsqr.f clamtsqr.f cgemqr.f
cgelq.f claswlq.f clamswlq.f cgemlq.f
ctplqt.f ctplqt2.f ctpmlqt.f
chetrd_2stage.f chetrd_he2hb.f chetrd_hb2st.F chb2st_kernels.f
cheevd_2stage.f cheev_2stage.f cheevx_2stage.f cheevr_2stage.f
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
cungtsqr.f cunhr_col.f )
cungtsqr.f cungtsqr_row.f cunhr_col.f )
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@ -253,7 +253,7 @@ set(DLASRC
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
dlargv.f dlarrv.f dlartv.f
dlarz.f dlarzb.f dlarzt.f dlasy2.f
dlasyf.f dlasyf_rook.f dlasyf_rk.f dlasyf_aa.f
@ -300,14 +300,14 @@ set(DLASRC
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f
dgelqt.f dgelqt3.f dgemlqt.f
dgetsls.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
dgetsls.f dgetsqrhrt.f dgeqr.f dlatsqr.f dlamtsqr.f dgemqr.f
dgelq.f dlaswlq.f dlamswlq.f dgemlq.f
dtplqt.f dtplqt2.f dtpmlqt.f
dsytrd_2stage.f dsytrd_sy2sb.f dsytrd_sb2st.F dsb2st_kernels.f
dsyevd_2stage.f dsyev_2stage.f dsyevx_2stage.f dsyevr_2stage.f
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
dlaorhr_col_getrfnp2.f dorgtsqr.f dorhr_col.f )
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f )
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@ -360,7 +360,7 @@ set(ZLASRC
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f
zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f
zlarfg.f zlarfgp.f zlarft.f
zlarfx.f zlarfy.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f
@ -402,13 +402,13 @@ set(ZLASRC
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f
ztplqt.f ztplqt2.f ztpmlqt.f
zgelqt.f zgelqt3.f zgemlqt.f
zgetsls.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
zgetsls.f zgetsqrhrt.f zgeqr.f zlatsqr.f zlamtsqr.f zgemqr.f
zgelq.f zlaswlq.f zlamswlq.f zgemlq.f
zhetrd_2stage.f zhetrd_he2hb.f zhetrd_hb2st.F zhb2st_kernels.f
zheevd_2stage.f zheev_2stage.f zheevx_2stage.f zheevr_2stage.f
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
zungtsqr.f zunhr_col.f)
zungtsqr.f zungtsqr_row.f zunhr_col.f)
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f

View File

@ -114,6 +114,8 @@ set(CSRC
lapacke_cgetrs_work.c
lapacke_cgetsls.c
lapacke_cgetsls_work.c
lapacke_cgetsqrhrt.c
lapacke_cgetsqrhrt_work.c
lapacke_cggbak.c
lapacke_cggbak_work.c
lapacke_cggbal.c
@ -590,6 +592,8 @@ set(CSRC
lapacke_cungrq_work.c
lapacke_cungtr.c
lapacke_cungtr_work.c
lapacke_cungtsqr_row.c
lapacke_cungtsqr_row_work.c
lapacke_cunmbr.c
lapacke_cunmbr_work.c
lapacke_cunmhr.c
@ -735,6 +739,8 @@ set(DSRC
lapacke_dgetrs_work.c
lapacke_dgetsls.c
lapacke_dgetsls_work.c
lapacke_dgetsqrhrt.c
lapacke_dgetsqrhrt_work.c
lapacke_dggbak.c
lapacke_dggbak_work.c
lapacke_dggbal.c
@ -862,6 +868,8 @@ set(DSRC
lapacke_dorgrq_work.c
lapacke_dorgtr.c
lapacke_dorgtr_work.c
lapacke_dorgtsqr_row.c
lapacke_dorgtsqr_row_work.c
lapacke_dormbr.c
lapacke_dormbr_work.c
lapacke_dormhr.c
@ -1309,6 +1317,8 @@ set(SSRC
lapacke_sgetrs_work.c
lapacke_sgetsls.c
lapacke_sgetsls_work.c
lapacke_sgetsqrhrt.c
lapacke_sgetsqrhrt_work.c
lapacke_sggbak.c
lapacke_sggbak_work.c
lapacke_sggbal.c
@ -1435,6 +1445,8 @@ set(SSRC
lapacke_sorgrq_work.c
lapacke_sorgtr.c
lapacke_sorgtr_work.c
lapacke_sorgtsqr_row.c
lapacke_sorgtsqr_row_work.c
lapacke_sormbr.c
lapacke_sormbr_work.c
lapacke_sormhr.c
@ -1877,6 +1889,8 @@ set(ZSRC
lapacke_zgetrs_work.c
lapacke_zgetsls.c
lapacke_zgetsls_work.c
lapacke_zgetsqrhrt.c
lapacke_zgetsqrhrt_work.c
lapacke_zggbak.c
lapacke_zggbak_work.c
lapacke_zggbal.c
@ -2351,6 +2365,8 @@ set(ZSRC
lapacke_zungrq_work.c
lapacke_zungtr.c
lapacke_zungtr_work.c
lapacke_zungtsqr_row.c
lapacke_zungtsqr_row_work.c
lapacke_zunmbr.c
lapacke_zunmbr_work.c
lapacke_zunmhr.c

View File

@ -299,6 +299,10 @@ if (NO_AVX2)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2")
endif ()
if (NO_AVX512)
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")
endif ()
if (USE_THREAD)
# USE_SIMPLE_THREADED_LEVEL3 = 1
# NO_AFFINITY = 1

View File

@ -416,6 +416,15 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_alpha.h"
#endif
#if (defined(ARCH_X86) || defined(ARCH_X86_64)) && defined(__CET__) && defined(__has_include)
#if __has_include(<cet.h>)
#include <cet.h>
#endif
#endif
#ifndef _CET_ENDBR
#define _CET_ENDBR
#endif
#ifdef ARCH_X86
#include "common_x86.h"
#endif

View File

@ -340,7 +340,8 @@ REALNAME:
.align 16; \
.globl REALNAME ;\
.type REALNAME, @function; \
REALNAME:
REALNAME: \
_CET_ENDBR
#ifdef PROFILE
#define PROFCODE call mcount

View File

@ -451,7 +451,8 @@ REALNAME:
.align 512; \
.globl REALNAME ;\
.type REALNAME, @function; \
REALNAME:
REALNAME: \
_CET_ENDBR
#ifdef PROFILE
#define PROFCODE call *mcount@GOTPCREL(%rip)

View File

@ -20,7 +20,7 @@ void F77_cgemv(int *order, char *transp, int *m, int *n,
get_transpose_type(transp, &trans);
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = (CBLAS_TEST_COMPLEX *)malloc( (*m)*LDA*sizeof( CBLAS_TEST_COMPLEX) );
A = (CBLAS_TEST_COMPLEX *)malloc( (*m)*(size_t)LDA*sizeof( CBLAS_TEST_COMPLEX) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -50,7 +50,7 @@ void F77_cgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
get_transpose_type(transp, &trans);
if (*order == TEST_ROW_MJR) {
LDA = *ku+*kl+2;
A=( CBLAS_TEST_COMPLEX* )malloc((*n+*kl)*LDA*sizeof(CBLAS_TEST_COMPLEX));
A=( CBLAS_TEST_COMPLEX* )malloc((*n+*kl)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*ku; i++ ){
irow=*ku+*kl-i;
jcol=(*ku)-i;
@ -94,7 +94,7 @@ void F77_cgeru(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX));
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -122,7 +122,7 @@ void F77_cgerc(int *order, int *m, int *n, CBLAS_TEST_COMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -154,7 +154,7 @@ void F77_chemv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = (CBLAS_TEST_COMPLEX *)malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
A = (CBLAS_TEST_COMPLEX *)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -190,7 +190,7 @@ int i,irow,j,jcol,LDA;
*incx, beta, y, *incy );
else {
LDA = *k+2;
A =(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_COMPLEX));
A =(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -251,8 +251,8 @@ void F77_chpmv(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
beta, y, *incy);
else {
LDA = *n;
A = (CBLAS_TEST_COMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ));
AP = (CBLAS_TEST_COMPLEX* )malloc( (((LDA+1)*LDA)/2)*
A = (CBLAS_TEST_COMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ));
AP = (CBLAS_TEST_COMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
sizeof( CBLAS_TEST_COMPLEX ));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -311,7 +311,7 @@ void F77_ctbmv(int *order, char *uplow, char *transp, char *diagn,
x, *incx);
else {
LDA = *k+2;
A=(CBLAS_TEST_COMPLEX *)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_COMPLEX));
A=(CBLAS_TEST_COMPLEX *)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -375,7 +375,7 @@ void F77_ctbsv(int *order, char *uplow, char *transp, char *diagn,
*incx);
else {
LDA = *k+2;
A=(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ));
A=(CBLAS_TEST_COMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ));
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -436,8 +436,8 @@ void F77_ctpmv(int *order, char *uplow, char *transp, char *diagn,
cblas_ctpmv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
else {
LDA = *n;
A=(CBLAS_TEST_COMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
AP=(CBLAS_TEST_COMPLEX*)malloc((((LDA+1)*LDA)/2)*
A=(CBLAS_TEST_COMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
AP=(CBLAS_TEST_COMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
sizeof(CBLAS_TEST_COMPLEX));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -491,8 +491,8 @@ void F77_ctpsv(int *order, char *uplow, char *transp, char *diagn,
cblas_ctpsv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
else {
LDA = *n;
A=(CBLAS_TEST_COMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
AP=(CBLAS_TEST_COMPLEX*)malloc((((LDA+1)*LDA)/2)*
A=(CBLAS_TEST_COMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX));
AP=(CBLAS_TEST_COMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
sizeof(CBLAS_TEST_COMPLEX));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -544,7 +544,7 @@ void F77_ctrmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA=*n+1;
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX));
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -573,7 +573,7 @@ void F77_ctrsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A =(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
A =(CBLAS_TEST_COMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -601,8 +601,8 @@ void F77_chpr(int *order, char *uplow, int *n, float *alpha,
cblas_chpr(CblasRowMajor, UNDEFINED, *n, *alpha, x, *incx, ap );
else {
LDA = *n;
A = (CBLAS_TEST_COMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
AP = ( CBLAS_TEST_COMPLEX* )malloc( (((LDA+1)*LDA)/2)*
A = (CBLAS_TEST_COMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
AP = ( CBLAS_TEST_COMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
sizeof( CBLAS_TEST_COMPLEX ));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -678,8 +678,8 @@ void F77_chpr2(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
*incy, ap );
else {
LDA = *n;
A=(CBLAS_TEST_COMPLEX*)malloc( LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
AP=(CBLAS_TEST_COMPLEX*)malloc( (((LDA+1)*LDA)/2)*
A=(CBLAS_TEST_COMPLEX*)malloc( (size_t)LDA*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
AP=(CBLAS_TEST_COMPLEX*)malloc( ((((size_t)LDA+1)*LDA)/2)*
sizeof( CBLAS_TEST_COMPLEX ));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -750,7 +750,7 @@ void F77_cher(int *order, char *uplow, int *n, float *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*LDA*sizeof( CBLAS_TEST_COMPLEX ));
A=(CBLAS_TEST_COMPLEX*)malloc((*n)*(size_t)LDA*sizeof( CBLAS_TEST_COMPLEX ));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
@ -784,7 +784,7 @@ void F77_cher2(int *order, char *uplow, int *n, CBLAS_TEST_COMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A= ( CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) );
A= ( CBLAS_TEST_COMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_COMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {

View File

@ -19,7 +19,7 @@ void F77_dgemv(int *order, char *transp, int *m, int *n, double *alpha,
get_transpose_type(transp, &trans);
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -43,7 +43,7 @@ void F77_dger(int *order, int *m, int *n, double *alpha, double *x, int *incx,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*m; i++ ) {
for( j=0; j<*n; j++ )
@ -74,7 +74,7 @@ void F77_dtrmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -102,7 +102,7 @@ void F77_dtrsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -123,7 +123,7 @@ void F77_dsymv(int *order, char *uplow, int *n, double *alpha, double *a,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -146,7 +146,7 @@ void F77_dsyr(int *order, char *uplow, int *n, double *alpha, double *x,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -170,7 +170,7 @@ void F77_dsyr2(int *order, char *uplow, int *n, double *alpha, double *x,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -196,7 +196,7 @@ void F77_dgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
if (*order == TEST_ROW_MJR) {
LDA = *ku+*kl+2;
A = ( double* )malloc( (*n+*kl)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n+*kl)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*ku; i++ ){
irow=*ku+*kl-i;
jcol=(*ku)-i;
@ -236,7 +236,7 @@ void F77_dtbmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *k+1;
A = ( double* )malloc( (*n+*k)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n+*k)*(size_t)LDA*sizeof( double ) );
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -282,7 +282,7 @@ void F77_dtbsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *k+1;
A = ( double* )malloc( (*n+*k)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n+*k)*(size_t)LDA*sizeof( double ) );
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -325,7 +325,7 @@ void F77_dsbmv(int *order, char *uplow, int *n, int *k, double *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *k+1;
A = ( double* )malloc( (*n+*k)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n+*k)*(size_t)LDA*sizeof( double ) );
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -369,8 +369,8 @@ void F77_dspmv(int *order, char *uplow, int *n, double *alpha, double *ap,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -411,8 +411,8 @@ void F77_dtpmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -451,8 +451,8 @@ void F77_dtpsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -488,8 +488,8 @@ void F77_dspr(int *order, char *uplow, int *n, double *alpha, double *x,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -540,8 +540,8 @@ void F77_dspr2(int *order, char *uplow, int *n, double *alpha, double *x,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( double* )malloc( LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( (((LDA+1)*LDA)/2)*sizeof( double ) );
A = ( double* )malloc( (size_t)LDA*LDA*sizeof( double ) );
AP = ( double* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( double ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )

View File

@ -26,34 +26,34 @@ void F77_dgemm(int *order, char *transpa, char *transpb, int *m, int *n,
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A = (double *)malloc( (*m)*LDA*sizeof( double ) );
A = (double *)malloc( (*m)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else {
LDA = *m+1;
A = ( double* )malloc( LDA*(*k)*sizeof( double ) );
A = ( double* )malloc( (size_t)LDA*(*k)*sizeof( double ) );
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B = ( double* )malloc( (*k)*LDB*sizeof( double ) );
B = ( double* )malloc( (*k)*(size_t)LDB*sizeof( double ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
}
else {
LDB = *k+1;
B = ( double* )malloc( LDB*(*n)*sizeof( double ) );
B = ( double* )malloc( (size_t)LDB*(*n)*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
}
LDC = *n+1;
C = ( double* )malloc( (*m)*LDC*sizeof( double ) );
C = ( double* )malloc( (*m)*(size_t)LDC*sizeof( double ) );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -89,25 +89,25 @@ void F77_dsymm(int *order, char *rtlf, char *uplow, int *m, int *n,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B = ( double* )malloc( (*m)*LDB*sizeof( double ) );
B = ( double* )malloc( (*m)*(size_t)LDB*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
LDC = *n+1;
C = ( double* )malloc( (*m)*LDC*sizeof( double ) );
C = ( double* )malloc( (*m)*(size_t)LDC*sizeof( double ) );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -143,20 +143,20 @@ void F77_dsyrk(int *order, char *uplow, char *transp, int *n, int *k,
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( double* )malloc( (*k)*LDA*sizeof( double ) );
A = ( double* )malloc( (*k)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDC = *n+1;
C = ( double* )malloc( (*n)*LDC*sizeof( double ) );
C = ( double* )malloc( (*n)*(size_t)LDC*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -191,8 +191,8 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
if (trans == CblasNoTrans) {
LDA = *k+1;
LDB = *k+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
B = ( double* )malloc( (*n)*LDB*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
B = ( double* )malloc( (*n)*(size_t)LDB*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j]=a[j*(*lda)+i];
@ -202,8 +202,8 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
else {
LDA = *n+1;
LDB = *n+1;
A = ( double* )malloc( LDA*(*k)*sizeof( double ) );
B = ( double* )malloc( LDB*(*k)*sizeof( double ) );
A = ( double* )malloc( (size_t)LDA*(*k)*sizeof( double ) );
B = ( double* )malloc( (size_t)LDB*(*k)*sizeof( double ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ){
A[i*LDA+j]=a[j*(*lda)+i];
@ -211,7 +211,7 @@ void F77_dsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
}
}
LDC = *n+1;
C = ( double* )malloc( (*n)*LDC*sizeof( double ) );
C = ( double* )malloc( (*n)*(size_t)LDC*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -249,20 +249,20 @@ void F77_dtrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B = ( double* )malloc( (*m)*LDB*sizeof( double ) );
B = ( double* )malloc( (*m)*(size_t)LDB*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
@ -300,20 +300,20 @@ void F77_dtrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A = ( double* )malloc( (*m)*LDA*sizeof( double ) );
A = ( double* )malloc( (*m)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( double* )malloc( (*n)*LDA*sizeof( double ) );
A = ( double* )malloc( (*n)*(size_t)LDA*sizeof( double ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B = ( double* )malloc( (*m)*LDB*sizeof( double ) );
B = ( double* )malloc( (*m)*(size_t)LDB*sizeof( double ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];

View File

@ -19,7 +19,7 @@ void F77_sgemv(int *order, char *transp, int *m, int *n, float *alpha,
get_transpose_type(transp, &trans);
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -43,7 +43,7 @@ void F77_sger(int *order, int *m, int *n, float *alpha, float *x, int *incx,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*m; i++ ) {
for( j=0; j<*n; j++ )
@ -74,7 +74,7 @@ void F77_strmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -102,7 +102,7 @@ void F77_strsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -123,7 +123,7 @@ void F77_ssymv(int *order, char *uplow, int *n, float *alpha, float *a,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -146,7 +146,7 @@ void F77_ssyr(int *order, char *uplow, int *n, float *alpha, float *x,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -170,7 +170,7 @@ void F77_ssyr2(int *order, char *uplow, int *n, float *alpha, float *x,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[ LDA*i+j ]=a[ (*lda)*j+i ];
@ -196,7 +196,7 @@ void F77_sgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
if (*order == TEST_ROW_MJR) {
LDA = *ku+*kl+2;
A = ( float* )malloc( (*n+*kl)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n+*kl)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*ku; i++ ){
irow=*ku+*kl-i;
jcol=(*ku)-i;
@ -236,7 +236,7 @@ void F77_stbmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *k+1;
A = ( float* )malloc( (*n+*k)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n+*k)*(size_t)LDA*sizeof( float ) );
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -282,7 +282,7 @@ void F77_stbsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *k+1;
A = ( float* )malloc( (*n+*k)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n+*k)*(size_t)LDA*sizeof( float ) );
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -325,7 +325,7 @@ void F77_ssbmv(int *order, char *uplow, int *n, int *k, float *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *k+1;
A = ( float* )malloc( (*n+*k)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n+*k)*(size_t)LDA*sizeof( float ) );
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -369,8 +369,8 @@ void F77_sspmv(int *order, char *uplow, int *n, float *alpha, float *ap,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -410,8 +410,8 @@ void F77_stpmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -449,8 +449,8 @@ void F77_stpsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -485,8 +485,8 @@ void F77_sspr(int *order, char *uplow, int *n, float *alpha, float *x,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )
@ -536,8 +536,8 @@ void F77_sspr2(int *order, char *uplow, int *n, float *alpha, float *x,
if (*order == TEST_ROW_MJR) {
LDA = *n;
A = ( float* )malloc( LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( (((LDA+1)*LDA)/2)*sizeof( float ) );
A = ( float* )malloc( (size_t)LDA*LDA*sizeof( float ) );
AP = ( float* )malloc( ((((size_t)LDA+1)*LDA)/2)*sizeof( float ) );
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
for( i=0; i<j+1; i++, k++ )

View File

@ -23,34 +23,34 @@ void F77_sgemm(int *order, char *transpa, char *transpb, int *m, int *n,
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A = (float *)malloc( (*m)*LDA*sizeof( float ) );
A = (float *)malloc( (*m)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else {
LDA = *m+1;
A = ( float* )malloc( LDA*(*k)*sizeof( float ) );
A = ( float* )malloc( (size_t)LDA*(*k)*sizeof( float ) );
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
if (transb == CblasNoTrans) {
LDB = *n+1;
B = ( float* )malloc( (*k)*LDB*sizeof( float ) );
B = ( float* )malloc( (*k)*(size_t)LDB*sizeof( float ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
}
else {
LDB = *k+1;
B = ( float* )malloc( LDB*(*n)*sizeof( float ) );
B = ( float* )malloc( (size_t)LDB*(*n)*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
}
LDC = *n+1;
C = ( float* )malloc( (*m)*LDC*sizeof( float ) );
C = ( float* )malloc( (*m)*(size_t)LDC*sizeof( float ) );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -85,25 +85,25 @@ void F77_ssymm(int *order, char *rtlf, char *uplow, int *m, int *n,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B = ( float* )malloc( (*m)*LDB*sizeof( float ) );
B = ( float* )malloc( (*m)*(size_t)LDB*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
LDC = *n+1;
C = ( float* )malloc( (*m)*LDC*sizeof( float ) );
C = ( float* )malloc( (*m)*(size_t)LDC*sizeof( float ) );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -139,20 +139,20 @@ void F77_ssyrk(int *order, char *uplow, char *transp, int *n, int *k,
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( float* )malloc( (*k)*LDA*sizeof( float ) );
A = ( float* )malloc( (*k)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDC = *n+1;
C = ( float* )malloc( (*n)*LDC*sizeof( float ) );
C = ( float* )malloc( (*n)*(size_t)LDC*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -187,8 +187,8 @@ void F77_ssyr2k(int *order, char *uplow, char *transp, int *n, int *k,
if (trans == CblasNoTrans) {
LDA = *k+1;
LDB = *k+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
B = ( float* )malloc( (*n)*LDB*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
B = ( float* )malloc( (*n)*(size_t)LDB*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j]=a[j*(*lda)+i];
@ -198,8 +198,8 @@ void F77_ssyr2k(int *order, char *uplow, char *transp, int *n, int *k,
else {
LDA = *n+1;
LDB = *n+1;
A = ( float* )malloc( LDA*(*k)*sizeof( float ) );
B = ( float* )malloc( LDB*(*k)*sizeof( float ) );
A = ( float* )malloc( (size_t)LDA*(*k)*sizeof( float ) );
B = ( float* )malloc( (size_t)LDB*(*k)*sizeof( float ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ){
A[i*LDA+j]=a[j*(*lda)+i];
@ -207,7 +207,7 @@ void F77_ssyr2k(int *order, char *uplow, char *transp, int *n, int *k,
}
}
LDC = *n+1;
C = ( float* )malloc( (*n)*LDC*sizeof( float ) );
C = ( float* )malloc( (*n)*(size_t)LDC*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -245,20 +245,20 @@ void F77_strmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B = ( float* )malloc( (*m)*LDB*sizeof( float ) );
B = ( float* )malloc( (*m)*(size_t)LDB*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
@ -296,20 +296,20 @@ void F77_strsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A = ( float* )malloc( (*m)*LDA*sizeof( float ) );
A = ( float* )malloc( (*m)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A = ( float* )malloc( (*n)*LDA*sizeof( float ) );
A = ( float* )malloc( (*n)*(size_t)LDA*sizeof( float ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B = ( float* )malloc( (*m)*LDB*sizeof( float ) );
B = ( float* )malloc( (*m)*(size_t)LDB*sizeof( float ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];

View File

@ -20,7 +20,7 @@ void F77_zgemv(int *order, char *transp, int *m, int *n,
get_transpose_type(transp, &trans);
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = (CBLAS_TEST_ZOMPLEX *)malloc( (*m)*LDA*sizeof( CBLAS_TEST_ZOMPLEX) );
A = (CBLAS_TEST_ZOMPLEX *)malloc( (*m)*(size_t)LDA*sizeof( CBLAS_TEST_ZOMPLEX) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -50,7 +50,7 @@ void F77_zgbmv(int *order, char *transp, int *m, int *n, int *kl, int *ku,
get_transpose_type(transp, &trans);
if (*order == TEST_ROW_MJR) {
LDA = *ku+*kl+2;
A=( CBLAS_TEST_ZOMPLEX* )malloc((*n+*kl)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=( CBLAS_TEST_ZOMPLEX* )malloc((*n+*kl)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*ku; i++ ){
irow=*ku+*kl-i;
jcol=(*ku)-i;
@ -94,7 +94,7 @@ void F77_zgeru(int *order, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -122,7 +122,7 @@ void F77_zgerc(int *order, int *m, int *n, CBLAS_TEST_ZOMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -154,7 +154,7 @@ void F77_zhemv(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A = (CBLAS_TEST_ZOMPLEX *)malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A = (CBLAS_TEST_ZOMPLEX *)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ){
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -190,7 +190,7 @@ int i,irow,j,jcol,LDA;
*incx, beta, y, *incy );
else {
LDA = *k+2;
A =(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A =(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -251,8 +251,8 @@ void F77_zhpmv(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
beta, y, *incy);
else {
LDA = *n;
A = (CBLAS_TEST_ZOMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
AP = (CBLAS_TEST_ZOMPLEX* )malloc( (((LDA+1)*LDA)/2)*
A = (CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
AP = (CBLAS_TEST_ZOMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
sizeof( CBLAS_TEST_ZOMPLEX ));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -311,7 +311,7 @@ void F77_ztbmv(int *order, char *uplow, char *transp, char *diagn,
x, *incx);
else {
LDA = *k+2;
A=(CBLAS_TEST_ZOMPLEX *)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX *)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -375,7 +375,7 @@ void F77_ztbsv(int *order, char *uplow, char *transp, char *diagn,
*incx);
else {
LDA = *k+2;
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n+*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
if (uplo == CblasUpper) {
for( i=0; i<*k; i++ ){
irow=*k-i;
@ -436,8 +436,8 @@ void F77_ztpmv(int *order, char *uplow, char *transp, char *diagn,
cblas_ztpmv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
else {
LDA = *n;
A=(CBLAS_TEST_ZOMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
AP=(CBLAS_TEST_ZOMPLEX*)malloc((((LDA+1)*LDA)/2)*
A=(CBLAS_TEST_ZOMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
AP=(CBLAS_TEST_ZOMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
sizeof(CBLAS_TEST_ZOMPLEX));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -491,8 +491,8 @@ void F77_ztpsv(int *order, char *uplow, char *transp, char *diagn,
cblas_ztpsv( CblasRowMajor, UNDEFINED, trans, diag, *n, ap, x, *incx );
else {
LDA = *n;
A=(CBLAS_TEST_ZOMPLEX*)malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
AP=(CBLAS_TEST_ZOMPLEX*)malloc((((LDA+1)*LDA)/2)*
A=(CBLAS_TEST_ZOMPLEX*)malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
AP=(CBLAS_TEST_ZOMPLEX*)malloc(((((size_t)LDA+1)*LDA)/2)*
sizeof(CBLAS_TEST_ZOMPLEX));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -544,7 +544,7 @@ void F77_ztrmv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA=*n+1;
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -573,7 +573,7 @@ void F77_ztrsv(int *order, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A =(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A =(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[ LDA*i+j ].real=a[ (*lda)*j+i ].real;
@ -601,8 +601,8 @@ void F77_zhpr(int *order, char *uplow, int *n, double *alpha,
cblas_zhpr(CblasRowMajor, UNDEFINED, *n, *alpha, x, *incx, ap );
else {
LDA = *n;
A = (CBLAS_TEST_ZOMPLEX* )malloc(LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
AP = ( CBLAS_TEST_ZOMPLEX* )malloc( (((LDA+1)*LDA)/2)*
A = (CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
AP = ( CBLAS_TEST_ZOMPLEX* )malloc( ((((size_t)LDA+1)*LDA)/2)*
sizeof( CBLAS_TEST_ZOMPLEX ));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -678,8 +678,8 @@ void F77_zhpr2(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
*incy, ap );
else {
LDA = *n;
A=(CBLAS_TEST_ZOMPLEX*)malloc( LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
AP=(CBLAS_TEST_ZOMPLEX*)malloc( (((LDA+1)*LDA)/2)*
A=(CBLAS_TEST_ZOMPLEX*)malloc( (size_t)LDA*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
AP=(CBLAS_TEST_ZOMPLEX*)malloc( ((((size_t)LDA+1)*LDA)/2)*
sizeof( CBLAS_TEST_ZOMPLEX ));
if (uplo == CblasUpper) {
for( j=0, k=0; j<*n; j++ )
@ -750,7 +750,7 @@ void F77_zher(int *order, char *uplow, int *n, double *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*LDA*sizeof( CBLAS_TEST_ZOMPLEX ));
A=(CBLAS_TEST_ZOMPLEX*)malloc((*n)*(size_t)LDA*sizeof( CBLAS_TEST_ZOMPLEX ));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
@ -784,7 +784,7 @@ void F77_zher2(int *order, char *uplow, int *n, CBLAS_TEST_ZOMPLEX *alpha,
if (*order == TEST_ROW_MJR) {
LDA = *n+1;
A= ( CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A= ( CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {

View File

@ -26,7 +26,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
if (*order == TEST_ROW_MJR) {
if (transa == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX*)malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -35,7 +35,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
}
else {
LDA = *m+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -45,7 +45,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
if (transb == CblasNoTrans) {
LDB = *n+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_ZOMPLEX) );
B=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
@ -54,7 +54,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
}
else {
LDB = *k+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_ZOMPLEX));
B=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDB*(*n)*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
@ -63,7 +63,7 @@ void F77_zgemm(int *order, char *transpa, char *transpb, int *m, int *n,
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
@ -103,7 +103,7 @@ void F77_zhemm(int *order, char *rtlf, char *uplow, int *m, int *n,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A= (CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A= (CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -112,7 +112,7 @@ void F77_zhemm(int *order, char *rtlf, char *uplow, int *m, int *n,
}
else{
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -120,14 +120,14 @@ void F77_zhemm(int *order, char *rtlf, char *uplow, int *m, int *n,
}
}
LDB = *n+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX ) );
B=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
B[i*LDB+j].imag=b[j*(*ldb)+i].imag;
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
@ -167,25 +167,25 @@ void F77_zsymm(int *order, char *rtlf, char *uplow, int *m, int *n,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
else{
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ )
A[i*LDA+j]=a[j*(*lda)+i];
}
LDB = *n+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ )
B[i*LDB+j]=b[j*(*ldb)+i];
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
C=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX));
for( j=0; j<*n; j++ )
for( i=0; i<*m; i++ )
C[i*LDC+j]=c[j*(*ldc)+i];
@ -221,7 +221,7 @@ void F77_zherk(int *order, char *uplow, char *transp, int *n, int *k,
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -230,7 +230,7 @@ void F77_zherk(int *order, char *uplow, char *transp, int *n, int *k,
}
else{
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -238,7 +238,7 @@ void F77_zherk(int *order, char *uplow, char *transp, int *n, int *k,
}
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
@ -277,7 +277,7 @@ void F77_zsyrk(int *order, char *uplow, char *transp, int *n, int *k,
if (*order == TEST_ROW_MJR) {
if (trans == CblasNoTrans) {
LDA = *k+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -286,7 +286,7 @@ void F77_zsyrk(int *order, char *uplow, char *transp, int *n, int *k,
}
else{
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc((*k)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -294,7 +294,7 @@ void F77_zsyrk(int *order, char *uplow, char *transp, int *n, int *k,
}
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
C=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
@ -333,8 +333,8 @@ void F77_zher2k(int *order, char *uplow, char *transp, int *n, int *k,
if (trans == CblasNoTrans) {
LDA = *k+1;
LDB = *k+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ));
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX ));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -346,8 +346,8 @@ void F77_zher2k(int *order, char *uplow, char *transp, int *n, int *k,
else {
LDA = *n+1;
LDB = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc( LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
B=(CBLAS_TEST_ZOMPLEX* )malloc( LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc( (size_t)LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
B=(CBLAS_TEST_ZOMPLEX* )malloc( (size_t)LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ){
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -357,7 +357,7 @@ void F77_zher2k(int *order, char *uplow, char *transp, int *n, int *k,
}
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
@ -397,8 +397,8 @@ void F77_zsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
if (trans == CblasNoTrans) {
LDA = *k+1;
LDB = *k+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
B=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*k; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -410,8 +410,8 @@ void F77_zsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
else {
LDA = *n+1;
LDB = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
B=(CBLAS_TEST_ZOMPLEX* )malloc(LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDA*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
B=(CBLAS_TEST_ZOMPLEX* )malloc((size_t)LDB*(*k)*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*k; i++ )
for( j=0; j<*n; j++ ){
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -421,7 +421,7 @@ void F77_zsyr2k(int *order, char *uplow, char *transp, int *n, int *k,
}
}
LDC = *n+1;
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_ZOMPLEX));
C=(CBLAS_TEST_ZOMPLEX* )malloc( (*n)*(size_t)LDC*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
C[i*LDC+j].real=c[j*(*ldc)+i].real;
@ -463,7 +463,7 @@ void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -472,7 +472,7 @@ void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
}
else{
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -480,7 +480,7 @@ void F77_ztrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
}
}
LDB = *n+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX));
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;
@ -522,7 +522,7 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
if (*order == TEST_ROW_MJR) {
if (side == CblasLeft) {
LDA = *m+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
A=(CBLAS_TEST_ZOMPLEX* )malloc( (*m)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX ) );
for( i=0; i<*m; i++ )
for( j=0; j<*m; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -531,7 +531,7 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
}
else{
LDA = *n+1;
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_ZOMPLEX));
A=(CBLAS_TEST_ZOMPLEX* )malloc((*n)*(size_t)LDA*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*n; i++ )
for( j=0; j<*n; j++ ) {
A[i*LDA+j].real=a[j*(*lda)+i].real;
@ -539,7 +539,7 @@ void F77_ztrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn,
}
}
LDB = *n+1;
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_ZOMPLEX));
B=(CBLAS_TEST_ZOMPLEX* )malloc((*m)*(size_t)LDB*sizeof(CBLAS_TEST_ZOMPLEX));
for( i=0; i<*m; i++ )
for( j=0; j<*n; j++ ) {
B[i*LDB+j].real=b[j*(*ldb)+i].real;

View File

@ -1,3 +1,4 @@
#include "cblas_test.h"
int CBLAS_CallFromC;
int RowMajorStrg;

View File

@ -126,7 +126,7 @@ extern void openblas_warning(int verbose, const char * msg);
#endif
#define get_cpu_ftr(id, var) ({ \
__asm__ ("mrs %0, "#id : "=r" (var)); \
__asm__ __volatile__ ("mrs %0, "#id : "=r" (var)); \
})
static char *corename[] = {

View File

@ -139,9 +139,13 @@ endif
ifneq (,$(filter 1 2,$(NOFORTRAN)))
#only build without Fortran
$(CC) $(CFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(INTERNALNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
else
ifeq ($(F_COMPILER), INTEL)
$(FC) $(FFLAGS) $(LDFLAGS) -all-load -headerpad_max_install_names -install_name "$(CURDIR)/../$(INTERNALNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def
else
$(FC) $(FFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(INTERNALNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
endif
endif
dllinit.$(SUFFIX) : dllinit.c
$(CC) $(CFLAGS) -c -o $(@F) -s $<

View File

@ -391,10 +391,6 @@ if ($link ne "") {
}
if ($vendor eq "INTEL"){
$linker_a .= "-lgfortran"
}
if ($vendor eq "FLANG"){
$linker_a .= "-lflang"
}

View File

@ -4,7 +4,15 @@
#else
#include "config_kernel.h"
#endif
#include "common.h"
#if (defined(__WIN32__) || defined(__WIN64__) || defined(__CYGWIN32__) || defined(__CYGWIN64__) || defined(_WIN32) || defined(_WIN64)) && defined(__64BIT__)
typedef long long BLASLONG;
typedef unsigned long long BLASULONG;
#else
typedef long BLASLONG;
typedef unsigned long BLASULONG;
#endif
#include "param.h"
int main(int argc, char **argv) {

View File

@ -150,9 +150,9 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
#endif
if ( *lda > *ldb )
msize = (*lda) * (*ldb) * sizeof(FLOAT);
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT);
else
msize = (*ldb) * (*ldb) * sizeof(FLOAT);
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT);
b = malloc(msize);
if ( b == NULL )

View File

@ -172,9 +172,9 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
#endif
if ( *lda > *ldb )
msize = (*lda) * (*ldb) * sizeof(FLOAT) * 2;
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2;
else
msize = (*ldb) * (*ldb) * sizeof(FLOAT) * 2;
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2;
b = malloc(msize);
if ( b == NULL )

View File

@ -79,8 +79,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
aa_i = fabs(da_r);
}
scale = (aa_i / aa_r);
ada = aa_r * sqrt(ONE + scale * scale);
if (aa_r == ZERO) {
ada = 0.;
} else {
scale = (aa_i / aa_r);
ada = aa_r * sqrt(ONE + scale * scale);
}
bb_r = fabs(db_r);
bb_i = fabs(db_i);
@ -90,9 +94,12 @@ void NAME(FLOAT *DA, FLOAT *DB, FLOAT *C, FLOAT *S){
bb_i = fabs(bb_r);
}
scale = (bb_i / bb_r);
adb = bb_r * sqrt(ONE + scale * scale);
if (bb_r == ZERO) {
adb = 0.;
} else {
scale = (bb_i / bb_r);
adb = bb_r * sqrt(ONE + scale * scale);
}
scale = ada + adb;
aa_r = da_r / scale;

View File

@ -270,11 +270,6 @@ All rights reserved.
ldr s1, [A02]
ldr s2, [A03]
ldr s3, [A04]
add A01, A01, #4
add A02, A02, #4
add A03, A03, #4
add A04, A04, #4
stp s0, s1, [B04]
add B04, B04, #8
@ -285,11 +280,6 @@ All rights reserved.
ldr s5, [A06]
ldr s6, [A07]
ldr s7, [A08]
ldr d4, [A05], #8
ldr d5, [A06], #8
ldr d6, [A07], #8
ldr d7, [A08], #8
stp s4, s5, [B04]
add B04, B04, #8

View File

@ -169,8 +169,13 @@ ZROTKERNEL = zrot.c
#
SSCALKERNEL = sscal.c
DSCALKERNEL = dscal.c
ifeq ($(C_COMPILER), PGI)
CSCALKERNEL = ../arm/zscal.c
ZSCALKERNEL = ../arm/zscal.c
else
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
endif
#
SSWAPKERNEL = sswap.c
DSWAPKERNEL = dswap.c
@ -181,7 +186,7 @@ ZSWAPKERNEL = zswap.c
SGEMVNKERNEL = sgemv_n.c
DGEMVNKERNEL = dgemv_n_power10.c
CGEMVNKERNEL = cgemv_n.c
ZGEMVNKERNEL = zgemv_n_4.c
ZGEMVNKERNEL = zgemv_n_power10.c
#
SGEMVTKERNEL = sgemv_t.c
DGEMVTKERNEL = dgemv_t_power10.c

View File

@ -242,8 +242,13 @@ ZROTKERNEL = zrot.c
#
SSCALKERNEL = sscal.c
DSCALKERNEL = dscal.c
ifeq ($(C_COMPILER), PGI)
CSCALKERNEL = ../arm/zscal.c
ZSCALKERNEL = ../arm/zscal.c
else
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
endif
#
SSWAPKERNEL = sswap.c
DSWAPKERNEL = dswap.c

View File

@ -166,8 +166,13 @@ ZROTKERNEL = zrot.c
#
SSCALKERNEL = sscal.c
DSCALKERNEL = dscal.c
ifeq ($(C_COMPILER), PGI)
CSCALKERNEL = ../arm/zscal.c
ZSCALKERNEL = ../arm/zscal.c
else
CSCALKERNEL = zscal.c
ZSCALKERNEL = zscal.c
endif
#
SSWAPKERNEL = sswap.c
DSWAPKERNEL = dswap.c

View File

@ -28,7 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else
#include "common.h"
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "cdot_microk_power10.c"
#else
#ifndef HAVE_KERNEL_8
@ -120,7 +120,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
if ((inc_x == 1) && (inc_y == 1)) {
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
BLASLONG n1 = n & -16;
#else
BLASLONG n1 = n & -8;

View File

@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "cswap_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "cswap_microk_power10.c"
#elif defined(POWER10)
#include "cswap_microk_power8.c"
#endif
#endif

View File

@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "dasum_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "dasum_microk_power10.c"
#elif defined(POWER10)
#include "dasum_microk_power8.c"
#endif
#endif
@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
if ( inc_x == 1 )
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 16 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;

View File

@ -190,10 +190,9 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
__vector_quad acc0, acc1, acc2, acc3, acc4,acc5,acc6,acc7;
BLASLONG l = 0;
vec_t *rowA = (vec_t *) & AO[0];
vec_t *rb = (vec_t *) & BO[0];
__vector_pair rowB, rowB1;
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
rowB = *((__vector_pair *)((void *)&BO[0]));
rowB1 = *((__vector_pair *)((void *)&BO[4]));
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@ -205,9 +204,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
for (l = 1; l < temp; l++)
{
rowA = (vec_t *) & AO[l << 3];
rb = (vec_t *) & BO[l << 3];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
rowB = *((__vector_pair *)((void *)&BO[l << 3]));
rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@ -247,9 +245,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
BLASLONG l = 0;
vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB, rowB1;
vec_t *rb = (vec_t *) & BO[0];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
rowB = *((__vector_pair *)((void *)&BO[0]));
rowB1 = *((__vector_pair *)((void *)&BO[4]));
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64ger (&acc2, rowB, rowA[1]);
@ -257,9 +254,8 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
for (l = 1; l < temp; l++)
{
rowA = (vec_t *) & AO[l << 2];
rb = (vec_t *) & BO[l << 3];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
rowB = *((__vector_pair *)((void *)&BO[l << 3]));
rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[1]);
@ -291,17 +287,15 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
BLASLONG l = 0;
vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB, rowB1;
vec_t *rb = (vec_t *) & BO[0];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
rowB = *((__vector_pair *)((void *)&BO[0]));
rowB1 = *((__vector_pair *)((void *)&BO[4]));
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB1, rowA[0]);
for (l = 1; l < temp; l++)
{
rowA = (vec_t *) & AO[l << 1];
rb = (vec_t *) & BO[l << 3];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
__builtin_vsx_assemble_pair (&rowB1, rb[3], rb[2]);
rowB = *((__vector_pair *)((void *)&BO[l << 3]));
rowB1 = *((__vector_pair *)((void *)&BO[(l << 3) + 4]));
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB1, rowA[0]);
}
@ -403,8 +397,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
BLASLONG l = 0;
vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB;
vec_t *rb = (vec_t *) & BO[0];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
rowB = *((__vector_pair *)((void *)&BO[0]));
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
__builtin_mma_xvf64ger (&acc2, rowB, rowA[2]);
@ -412,8 +405,7 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
for (l = 1; l < temp; l++)
{
rowA = (vec_t *) & AO[l << 3];
rb = (vec_t *) & BO[l << 2];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
rowB = *((__vector_pair *)((void *)&BO[l << 2]));
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
__builtin_mma_xvf64gerpp (&acc2, rowB, rowA[2]);
@ -445,15 +437,13 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
BLASLONG l = 0;
vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB;
vec_t *rb = (vec_t *) & BO[0];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
rowB = *((__vector_pair *)((void *)&BO[0]));
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64ger (&acc1, rowB, rowA[1]);
for (l = 1; l < temp; l++)
{
rowA = (vec_t *) & AO[l << 2];
rb = (vec_t *) & BO[l << 2];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
rowB = *((__vector_pair *)((void *)&BO[l << 2]));
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
__builtin_mma_xvf64gerpp (&acc1, rowB, rowA[1]);
}
@ -481,14 +471,12 @@ CNAME (BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, FLOAT * A, FLOAT * B,
BLASLONG l = 0;
vec_t *rowA = (vec_t *) & AO[0];
__vector_pair rowB;
vec_t *rb = (vec_t *) & BO[0];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
rowB = *((__vector_pair *)((void *)&BO[0]));
__builtin_mma_xvf64ger (&acc0, rowB, rowA[0]);
for (l = 1; l < temp; l++)
{
rowA = (vec_t *) & AO[l << 1];
rb = (vec_t *) & BO[l << 2];
__builtin_vsx_assemble_pair (&rowB, rb[1], rb[0]);
rowB = *((__vector_pair *)((void *)&BO[l << 2]));
__builtin_mma_xvf64gerpp (&acc0, rowB, rowA[0]);
}
SAVE_ACC (&acc0, 0);

View File

@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "drot_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "drot_microk_power10.c"
#elif defined(POWER10)
#include "drot_microk_power8.c"
#endif
#endif
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
if ( (inc_x == 1) && (inc_y == 1) )
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 16 )
{
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3;

View File

@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "dscal_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "dscal_microk_power10.c"
#elif defined(POWER10)
#include "dscal_microk_power8.c"
#endif
#endif
@ -102,7 +104,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
if ( da == 0.0 )
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 16 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;
@ -136,7 +138,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
else
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 16 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 3) & 0x3;

View File

@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "dswap_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "swap_microk_power10.c"
#elif defined(POWER10)
#include "dswap_microk_power8.c"
#endif
#endif
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
if ( (inc_x == 1) && (inc_y == 1 ))
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 32 )
{
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3;

View File

@ -49,8 +49,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "sasum_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "sasum_microk_power10.c"
#elif defined(POWER10)
#include "sasum_microk_power8.c"
#endif
#endif
@ -112,7 +114,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
if ( inc_x == 1 )
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 32 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;

View File

@ -42,8 +42,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "srot_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "srot_microk_power10.c"
#elif defined(POWER10)
#include "srot_microk_power8.c"
#endif
#endif
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT
if ( (inc_x == 1) && (inc_y == 1) )
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 16 )
{
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7;

View File

@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "sscal_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "sscal_microk_power10.c"
#elif defined(POWER10)
#include "sscal_microk_power8.c"
#endif
#endif
@ -104,7 +106,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
if ( da == 0.0 )
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 32 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;
@ -138,7 +140,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
else
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 32 )
{
BLASLONG align = ((32 - ((uintptr_t)x & (uintptr_t)0x1F)) >> 2) & 0x7;

View File

@ -38,8 +38,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "sswap_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "swap_microk_power10.c"
#elif defined(POWER10)
#include "sswap_microk_power8.c"
#endif
#endif
@ -117,7 +119,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
if ( (inc_x == 1) && (inc_y == 1 ))
{
#if defined(POWER10)
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
if ( n >= 64 )
{
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7;

File diff suppressed because it is too large Load Diff

View File

@ -43,6 +43,134 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#elif HAVE_KERNEL_4x4_VEC
#if defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
typedef __vector unsigned char vec_t;
typedef FLOAT v4sf_t __attribute__ ((vector_size (16)));
static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
BLASLONG i;
FLOAT *a0, *a1, *a2, *a3;
a0 = ap;
a1 = ap + lda;
a2 = a1 + lda;
a3 = a2 + lda;
__vector_quad acc0, acc1, acc2, acc3;;
__vector_quad acc4, acc5, acc6, acc7;
v4sf_t result[4];
__vector_pair *Va0, *Va1, *Va2, *Va3;
i = 0;
n = n << 1;
__builtin_mma_xxsetaccz (&acc0);
__builtin_mma_xxsetaccz (&acc1);
__builtin_mma_xxsetaccz (&acc2);
__builtin_mma_xxsetaccz (&acc3);
__builtin_mma_xxsetaccz (&acc4);
__builtin_mma_xxsetaccz (&acc5);
__builtin_mma_xxsetaccz (&acc6);
__builtin_mma_xxsetaccz (&acc7);
while (i < n) {
vec_t *rx = (vec_t *) & x[i];
Va0 = ((__vector_pair*)((void*)&a0[i]));
Va1 = ((__vector_pair*)((void*)&a1[i]));
Va2 = ((__vector_pair*)((void*)&a2[i]));
Va3 = ((__vector_pair*)((void*)&a3[i]));
__builtin_mma_xvf64gerpp (&acc0, Va0[0], rx[0]);
__builtin_mma_xvf64gerpp (&acc1, Va1[0], rx[0]);
__builtin_mma_xvf64gerpp (&acc2, Va2[0], rx[0]);
__builtin_mma_xvf64gerpp (&acc3, Va3[0], rx[0]);
__builtin_mma_xvf64gerpp (&acc4, Va0[0], rx[1]);
__builtin_mma_xvf64gerpp (&acc5, Va1[0], rx[1]);
__builtin_mma_xvf64gerpp (&acc6, Va2[0], rx[1]);
__builtin_mma_xvf64gerpp (&acc7, Va3[0], rx[1]);
__builtin_mma_xvf64gerpp (&acc0, Va0[1], rx[2]);
__builtin_mma_xvf64gerpp (&acc1, Va1[1], rx[2]);
__builtin_mma_xvf64gerpp (&acc2, Va2[1], rx[2]);
__builtin_mma_xvf64gerpp (&acc3, Va3[1], rx[2]);
__builtin_mma_xvf64gerpp (&acc4, Va0[1], rx[3]);
__builtin_mma_xvf64gerpp (&acc5, Va1[1], rx[3]);
__builtin_mma_xvf64gerpp (&acc6, Va2[1], rx[3]);
__builtin_mma_xvf64gerpp (&acc7, Va3[1], rx[3]);
i += 8;
}
#if ( !defined(CONJ) && !defined(XCONJ) ) || ( defined(CONJ) && defined(XCONJ) )
__builtin_mma_disassemble_acc ((void *)result, &acc0);
register FLOAT temp_r0 = result[0][0] - result[1][1];
register FLOAT temp_i0 = result[0][1] + result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc4);
temp_r0 += result[2][0] - result[3][1];
temp_i0 += result[2][1] + result[3][0];
__builtin_mma_disassemble_acc ((void *)result, &acc1);
register FLOAT temp_r1 = result[0][0] - result[1][1];
register FLOAT temp_i1 = result[0][1] + result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc5);
temp_r1 += result[2][0] - result[3][1];
temp_i1 += result[2][1] + result[3][0];
__builtin_mma_disassemble_acc ((void *)result, &acc2);
register FLOAT temp_r2 = result[0][0] - result[1][1];
register FLOAT temp_i2 = result[0][1] + result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc6);
temp_r2 += result[2][0] - result[3][1];
temp_i2 += result[2][1] + result[3][0];
__builtin_mma_disassemble_acc ((void *)result, &acc3);
register FLOAT temp_r3 = result[0][0] - result[1][1];
register FLOAT temp_i3 = result[0][1] + result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc7);
temp_r3 += result[2][0] - result[3][1];
temp_i3 += result[2][1] + result[3][0];
#else
__builtin_mma_disassemble_acc ((void *)result, &acc0);
register FLOAT temp_r0 = result[0][0] + result[1][1];
register FLOAT temp_i0 = result[0][1] - result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc4);
temp_r0 += result[2][0] + result[3][1];
temp_i0 += result[2][1] - result[3][0];
__builtin_mma_disassemble_acc ((void *)result, &acc1);
register FLOAT temp_r1 = result[0][0] + result[1][1];
register FLOAT temp_i1 = result[0][1] - result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc5);
temp_r1 += result[2][0] + result[3][1];
temp_i1 += result[2][1] - result[3][0];
__builtin_mma_disassemble_acc ((void *)result, &acc2);
register FLOAT temp_r2 = result[0][0] + result[1][1];
register FLOAT temp_i2 = result[0][1] - result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc6);
temp_r2 += result[2][0] + result[3][1];
temp_i2 += result[2][1] - result[3][0];
__builtin_mma_disassemble_acc ((void *)result, &acc3);
register FLOAT temp_r3 = result[0][0] + result[1][1];
register FLOAT temp_i3 = result[0][1] - result[1][0];
__builtin_mma_disassemble_acc ((void *)result, &acc7);
temp_r3 += result[2][0] + result[3][1];
temp_i3 += result[2][1] - result[3][0];
#endif
#if !defined(XCONJ)
y[0] += alpha_r * temp_r0 - alpha_i * temp_i0;
y[1] += alpha_r * temp_i0 + alpha_i * temp_r0;
y[2] += alpha_r * temp_r1 - alpha_i * temp_i1;
y[3] += alpha_r * temp_i1 + alpha_i * temp_r1;
y[4] += alpha_r * temp_r2 - alpha_i * temp_i2;
y[5] += alpha_r * temp_i2 + alpha_i * temp_r2;
y[6] += alpha_r * temp_r3 - alpha_i * temp_i3;
y[7] += alpha_r * temp_i3 + alpha_i * temp_r3;
#else
y[0] += alpha_r * temp_r0 + alpha_i * temp_i0;
y[1] -= alpha_r * temp_i0 - alpha_i * temp_r0;
y[2] += alpha_r * temp_r1 + alpha_i * temp_i1;
y[3] -= alpha_r * temp_i1 - alpha_i * temp_r1;
y[4] += alpha_r * temp_r2 + alpha_i * temp_i2;
y[5] -= alpha_r * temp_i2 - alpha_i * temp_r2;
y[6] += alpha_r * temp_r3 + alpha_i * temp_i3;
y[7] -= alpha_r * temp_i3 - alpha_i * temp_r3;
#endif
}
#else
static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {
BLASLONG i;
FLOAT *a0, *a1, *a2, *a3;
@ -198,6 +326,7 @@ static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOA
#endif
}
#endif
#else
static void zgemv_kernel_4x4(BLASLONG n, BLASLONG lda, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT alpha_r, FLOAT alpha_i) {

View File

@ -43,12 +43,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(DOUBLE)
#include "zscal_microk_power8.c"
#endif
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#if defined(DOUBLE)
#include "zscal_microk_power10.c"
#else
#include "cscal_microk_power10.c"
#endif
#elif defined(POWER10)
#if defined(DOUBLE)
#include "zscal_microk_power8.c"
#endif
#endif
#endif

View File

@ -39,8 +39,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(__VEC__) || defined(__ALTIVEC__)
#if defined(POWER8) || defined(POWER9)
#include "zswap_microk_power8.c"
#elif defined(POWER10)
#elif defined(POWER10) && (__BYTE_ORDER__ != __ORDER_BIG_ENDIAN__)
#include "cswap_microk_power10.c"
#elif defined(POWER10)
#include "zswap_microk_power8.c"
#endif
#endif

View File

@ -491,4 +491,3 @@ SSUMKERNEL = ../arm/sum.c
DSUMKERNEL = ../arm/sum.c
SOMATCOPY_RT = omatcopy_rt.c
DOMATCOPY_RT = omatcopy_rt.c

View File

@ -1,6 +1,4 @@
/* need a new enough GCC for avx512 support */
#if (( defined(__GNUC__) && __GNUC__ > 6 && defined(__AVX512CD__)) || (defined(__clang__) && __clang_major__ >= 9))
#if defined(HAVE_FMA3) && defined(HAVE_AVX2)
#define HAVE_DROT_KERNEL 1
#include <immintrin.h>

View File

@ -1,10 +1,10 @@
/* the direct sgemm code written by Arjan van der Ven */
#include "common.h"
#if defined(SKYLAKEX) || defined (COOPERLAKE)
#include <immintrin.h>
#include "common.h"
/*
* "Direct sgemm" code. This code operates directly on the inputs and outputs
@ -472,7 +472,7 @@ void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG s
}
}
#else
#include "common.h"
void CNAME (BLASLONG M, BLASLONG N, BLASLONG K, float * __restrict A, BLASLONG strideA, float * __restrict B, BLASLONG strideB , float * __restrict R, BLASLONG strideR)
{}
#endif

View File

@ -501,7 +501,11 @@ CNAME(BLASLONG m, BLASLONG n, BLASLONG k, float alpha, float * __restrict__ A, f
int32_t permil[16] = {0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3};
BLASLONG n_count = n;
float *a_pointer = A,*b_pointer = B,*c_pointer = C,*ctemp = C,*next_b = B;
#if defined(__clang__)
for(;n_count>23;n_count-=24) COMPUTE(24)
#else
for(;n_count>23;n_count-=24) COMPUTE_n24
#endif
for(;n_count>19;n_count-=20) COMPUTE(20)
for(;n_count>15;n_count-=16) COMPUTE(16)
for(;n_count>11;n_count-=12) COMPUTE(12)

View File

@ -566,8 +566,8 @@ void LAPACK_cgbrfsx(
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
lapack_complex_float const* AB, lapack_int const* ldab,
lapack_complex_float const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
float* R,
float* C,
const float* R,
const float* C,
lapack_complex_float const* B, lapack_int const* ldb,
lapack_complex_float* X, lapack_int const* ldx,
float* rcond,
@ -585,8 +585,8 @@ void LAPACK_dgbrfsx(
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
double const* AB, lapack_int const* ldab,
double const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
double* R,
double* C,
const double* R,
const double* C,
double const* B, lapack_int const* ldb,
double* X, lapack_int const* ldx,
double* rcond,
@ -604,8 +604,8 @@ void LAPACK_sgbrfsx(
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
float const* AB, lapack_int const* ldab,
float const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
float* R,
float* C,
const float* R,
const float* C,
float const* B, lapack_int const* ldb,
float* X, lapack_int const* ldx,
float* rcond,
@ -623,8 +623,8 @@ void LAPACK_zgbrfsx(
lapack_int const* n, lapack_int const* kl, lapack_int const* ku, lapack_int const* nrhs,
lapack_complex_double const* AB, lapack_int const* ldab,
lapack_complex_double const* AFB, lapack_int const* ldafb, lapack_int const* ipiv,
double* R,
double* C,
const double* R,
const double* C,
lapack_complex_double const* B, lapack_int const* ldb,
lapack_complex_double* X, lapack_int const* ldx,
double* rcond,
@ -2941,6 +2941,42 @@ void LAPACK_zgetsls(
lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_cgetsqrhrt LAPACK_GLOBAL(cgetsqrhrt,CGETSQRHRT)
void LAPACK_cgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
lapack_complex_float* A, lapack_int const* lda,
lapack_complex_float* T, lapack_int const* ldt,
lapack_complex_float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_dgetsqrhrt LAPACK_GLOBAL(dgetsqrhrt,DGETSQRHRT)
void LAPACK_dgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
double* A, lapack_int const* lda,
double* T, lapack_int const* ldt,
double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_sgetsqrhrt LAPACK_GLOBAL(sgetsqrhrt,SGETSQRHRT)
void LAPACK_sgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
float* A, lapack_int const* lda,
float* T, lapack_int const* ldt,
float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_zgetsqrhrt LAPACK_GLOBAL(zgetsqrhrt,ZGETSQRHRT)
void LAPACK_zgetsqrhrt(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb1, lapack_int const* nb1, lapack_int const* nb2,
lapack_complex_double* A, lapack_int const* lda,
lapack_complex_double* T, lapack_int const* ldt,
lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_cggbak LAPACK_GLOBAL(cggbak,CGGBAK)
void LAPACK_cggbak(
char const* job, char const* side,
@ -4768,7 +4804,7 @@ void LAPACK_chegst(
lapack_int const* itype, char const* uplo,
lapack_int const* n,
lapack_complex_float* A, lapack_int const* lda,
lapack_complex_float* B, lapack_int const* ldb,
const lapack_complex_float* B, lapack_int const* ldb,
lapack_int* info );
#define LAPACK_zhegst LAPACK_GLOBAL(zhegst,ZHEGST)
@ -4776,7 +4812,7 @@ void LAPACK_zhegst(
lapack_int const* itype, char const* uplo,
lapack_int const* n,
lapack_complex_double* A, lapack_int const* lda,
lapack_complex_double* B, lapack_int const* ldb,
const lapack_complex_double* B, lapack_int const* ldb,
lapack_int* info );
#define LAPACK_chegv LAPACK_GLOBAL(chegv,CHEGV)
@ -4913,7 +4949,7 @@ void LAPACK_cherfsx(
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_float const* A, lapack_int const* lda,
lapack_complex_float const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
float* S,
const float* S,
lapack_complex_float const* B, lapack_int const* ldb,
lapack_complex_float* X, lapack_int const* ldx,
float* rcond,
@ -4931,7 +4967,7 @@ void LAPACK_zherfsx(
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_double const* A, lapack_int const* lda,
lapack_complex_double const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
double* S,
const double* S,
lapack_complex_double const* B, lapack_int const* ldb,
lapack_complex_double* X, lapack_int const* ldx,
double* rcond,
@ -7251,6 +7287,24 @@ void LAPACK_sorgtr(
float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_dorgtsqr_row LAPACK_GLOBAL(dorgtsqr_row,DORGTSQR_ROW)
void LAPACK_dorgtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
double* A, lapack_int const* lda,
double const* T, lapack_int const* ldt,
double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_sorgtsqr_row LAPACK_GLOBAL(sorgtsqr_row,SORGTSQR_ROW)
void LAPACK_sorgtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
float* A, lapack_int const* lda,
float const* T, lapack_int const* ldt,
float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_dormbr LAPACK_GLOBAL(dormbr,DORMBR)
void LAPACK_dormbr(
char const* vect, char const* side, char const* trans,
@ -8005,7 +8059,7 @@ void LAPACK_cporfsx(
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_float const* A, lapack_int const* lda,
lapack_complex_float const* AF, lapack_int const* ldaf,
float* S,
const float* S,
lapack_complex_float const* B, lapack_int const* ldb,
lapack_complex_float* X, lapack_int const* ldx,
float* rcond,
@ -8023,7 +8077,7 @@ void LAPACK_dporfsx(
lapack_int const* n, lapack_int const* nrhs,
double const* A, lapack_int const* lda,
double const* AF, lapack_int const* ldaf,
double* S,
const double* S,
double const* B, lapack_int const* ldb,
double* X, lapack_int const* ldx,
double* rcond,
@ -8041,7 +8095,7 @@ void LAPACK_sporfsx(
lapack_int const* n, lapack_int const* nrhs,
float const* A, lapack_int const* lda,
float const* AF, lapack_int const* ldaf,
float* S,
const float* S,
float const* B, lapack_int const* ldb,
float* X, lapack_int const* ldx,
float* rcond,
@ -8059,7 +8113,7 @@ void LAPACK_zporfsx(
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_double const* A, lapack_int const* lda,
lapack_complex_double const* AF, lapack_int const* ldaf,
double* S,
const double* S,
lapack_complex_double const* B, lapack_int const* ldb,
lapack_complex_double* X, lapack_int const* ldx,
double* rcond,
@ -10756,7 +10810,7 @@ void LAPACK_csyrfsx(
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_float const* A, lapack_int const* lda,
lapack_complex_float const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
float* S,
const float* S,
lapack_complex_float const* B, lapack_int const* ldb,
lapack_complex_float* X, lapack_int const* ldx,
float* rcond,
@ -10774,7 +10828,7 @@ void LAPACK_dsyrfsx(
lapack_int const* n, lapack_int const* nrhs,
double const* A, lapack_int const* lda,
double const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
double* S,
const double* S,
double const* B, lapack_int const* ldb,
double* X, lapack_int const* ldx,
double* rcond,
@ -10792,7 +10846,7 @@ void LAPACK_ssyrfsx(
lapack_int const* n, lapack_int const* nrhs,
float const* A, lapack_int const* lda,
float const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
float* S,
const float* S,
float const* B, lapack_int const* ldb,
float* X, lapack_int const* ldx,
float* rcond,
@ -10810,7 +10864,7 @@ void LAPACK_zsyrfsx(
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_double const* A, lapack_int const* lda,
lapack_complex_double const* AF, lapack_int const* ldaf, lapack_int const* ipiv,
double* S,
const double* S,
lapack_complex_double const* B, lapack_int const* ldb,
lapack_complex_double* X, lapack_int const* ldx,
double* rcond,
@ -11556,7 +11610,7 @@ void LAPACK_zsytrs(
void LAPACK_csytrs2(
char const* uplo,
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_float* A, lapack_int const* lda, lapack_int const* ipiv,
const lapack_complex_float* A, lapack_int const* lda, lapack_int const* ipiv,
lapack_complex_float* B, lapack_int const* ldb,
lapack_complex_float* work,
lapack_int* info );
@ -11565,7 +11619,7 @@ void LAPACK_csytrs2(
void LAPACK_dsytrs2(
char const* uplo,
lapack_int const* n, lapack_int const* nrhs,
double* A, lapack_int const* lda, lapack_int const* ipiv,
const double* A, lapack_int const* lda, lapack_int const* ipiv,
double* B, lapack_int const* ldb,
double* work,
lapack_int* info );
@ -11574,7 +11628,7 @@ void LAPACK_dsytrs2(
void LAPACK_ssytrs2(
char const* uplo,
lapack_int const* n, lapack_int const* nrhs,
float* A, lapack_int const* lda, lapack_int const* ipiv,
const float* A, lapack_int const* lda, lapack_int const* ipiv,
float* B, lapack_int const* ldb,
float* work,
lapack_int* info );
@ -11583,7 +11637,7 @@ void LAPACK_ssytrs2(
void LAPACK_zsytrs2(
char const* uplo,
lapack_int const* n, lapack_int const* nrhs,
lapack_complex_double* A, lapack_int const* lda, lapack_int const* ipiv,
const lapack_complex_double* A, lapack_int const* lda, lapack_int const* ipiv,
lapack_complex_double* B, lapack_int const* ldb,
lapack_complex_double* work,
lapack_int* info );
@ -13540,6 +13594,24 @@ void LAPACK_zungtr(
lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_cungtsqr_row LAPACK_GLOBAL(cungtsqr_row,CUNGTSQR_ROW)
void LAPACK_cungtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
lapack_complex_float* A, lapack_int const* lda,
lapack_complex_float const* T, lapack_int const* ldt,
lapack_complex_float* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_zungtsqr_row LAPACK_GLOBAL(zungtsqr_row,ZUNGTSQR_ROW)
void LAPACK_zungtsqr_row(
lapack_int const* m, lapack_int const* n,
lapack_int const* mb, lapack_int const* nb,
lapack_complex_double* A, lapack_int const* lda,
lapack_complex_double const* T, lapack_int const* ldt,
lapack_complex_double* work, lapack_int const* lwork,
lapack_int* info );
#define LAPACK_cunmbr LAPACK_GLOBAL(cunmbr,CUNMBR)
void LAPACK_cunmbr(
char const* vect, char const* side, char const* trans,

View File

@ -1867,11 +1867,11 @@ lapack_int LAPACKE_zheevx( int matrix_layout, char jobz, char range, char uplo,
lapack_int LAPACKE_chegst( int matrix_layout, lapack_int itype, char uplo,
lapack_int n, lapack_complex_float* a,
lapack_int lda, lapack_complex_float* b,
lapack_int lda, const lapack_complex_float* b,
lapack_int ldb );
lapack_int LAPACKE_zhegst( int matrix_layout, lapack_int itype, char uplo,
lapack_int n, lapack_complex_double* a,
lapack_int lda, lapack_complex_double* b,
lapack_int lda, const lapack_complex_double* b,
lapack_int ldb );
lapack_int LAPACKE_chegv( int matrix_layout, lapack_int itype, char jobz,
@ -2598,6 +2598,15 @@ lapack_int LAPACKE_sorgtr( int matrix_layout, char uplo, lapack_int n, float* a,
lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a,
lapack_int lda, const double* tau );
lapack_int LAPACKE_sorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
float* a, lapack_int lda,
const float* t, lapack_int ldt );
lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt );
lapack_int LAPACKE_sormbr( int matrix_layout, char vect, char side, char trans,
lapack_int m, lapack_int n, lapack_int k,
const float* a, lapack_int lda, const float* tau,
@ -4577,6 +4586,15 @@ lapack_int LAPACKE_zungtr( int matrix_layout, char uplo, lapack_int n,
lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* tau );
lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* t, lapack_int ldt );
lapack_int LAPACKE_cunmbr( int matrix_layout, char vect, char side, char trans,
lapack_int m, lapack_int n, lapack_int k,
const lapack_complex_float* a, lapack_int lda,
@ -6932,11 +6950,11 @@ lapack_int LAPACKE_zheevx_work( int matrix_layout, char jobz, char range,
lapack_int LAPACKE_chegst_work( int matrix_layout, lapack_int itype, char uplo,
lapack_int n, lapack_complex_float* a,
lapack_int lda, lapack_complex_float* b,
lapack_int lda, const lapack_complex_float* b,
lapack_int ldb );
lapack_int LAPACKE_zhegst_work( int matrix_layout, lapack_int itype, char uplo,
lapack_int n, lapack_complex_double* a,
lapack_int lda, lapack_complex_double* b,
lapack_int lda, const lapack_complex_double* b,
lapack_int ldb );
lapack_int LAPACKE_chegv_work( int matrix_layout, lapack_int itype, char jobz,
@ -7880,6 +7898,19 @@ lapack_int LAPACKE_dorgtr_work( int matrix_layout, char uplo, lapack_int n,
double* a, lapack_int lda, const double* tau,
double* work, lapack_int lwork );
lapack_int LAPACKE_sorgtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
float* a, lapack_int lda,
const float* t, lapack_int ldt,
float* work, lapack_int lwork );
lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt,
double* work, lapack_int lwork );
lapack_int LAPACKE_sormbr_work( int matrix_layout, char vect, char side,
char trans, lapack_int m, lapack_int n,
lapack_int k, const float* a, lapack_int lda,
@ -10281,6 +10312,19 @@ lapack_int LAPACKE_zungtr_work( int matrix_layout, char uplo, lapack_int n,
const lapack_complex_double* tau,
lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zungtsqr_row_work( int matrix_layout,
lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_double* a, lapack_int lda,
const lapack_complex_double* t, lapack_int ldt,
lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_cunmbr_work( int matrix_layout, char vect, char side,
char trans, lapack_int m, lapack_int n,
lapack_int k, const lapack_complex_float* a,
@ -10553,11 +10597,11 @@ lapack_int LAPACKE_csytri2x_work( int matrix_layout, char uplo, lapack_int n,
const lapack_int* ipiv,
lapack_complex_float* work, lapack_int nb );
lapack_int LAPACKE_csytrs2( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, lapack_complex_float* a,
lapack_int nrhs, const lapack_complex_float* a,
lapack_int lda, const lapack_int* ipiv,
lapack_complex_float* b, lapack_int ldb );
lapack_int LAPACKE_csytrs2_work( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, lapack_complex_float* a,
lapack_int nrhs, const lapack_complex_float* a,
lapack_int lda, const lapack_int* ipiv,
lapack_complex_float* b, lapack_int ldb,
lapack_complex_float* work );
@ -10718,10 +10762,10 @@ lapack_int LAPACKE_dsytri2x_work( int matrix_layout, char uplo, lapack_int n,
const lapack_int* ipiv, double* work,
lapack_int nb );
lapack_int LAPACKE_dsytrs2( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, double* a, lapack_int lda,
lapack_int nrhs, const double* a, lapack_int lda,
const lapack_int* ipiv, double* b, lapack_int ldb );
lapack_int LAPACKE_dsytrs2_work( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, double* a,
lapack_int nrhs, const double* a,
lapack_int lda, const lapack_int* ipiv,
double* b, lapack_int ldb, double* work );
lapack_int LAPACKE_sbbcsd( int matrix_layout, char jobu1, char jobu2,
@ -10813,10 +10857,10 @@ lapack_int LAPACKE_ssytri2x_work( int matrix_layout, char uplo, lapack_int n,
const lapack_int* ipiv, float* work,
lapack_int nb );
lapack_int LAPACKE_ssytrs2( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, float* a, lapack_int lda,
lapack_int nrhs, const float* a, lapack_int lda,
const lapack_int* ipiv, float* b, lapack_int ldb );
lapack_int LAPACKE_ssytrs2_work( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, float* a,
lapack_int nrhs, const float* a,
lapack_int lda, const lapack_int* ipiv,
float* b, lapack_int ldb, float* work );
lapack_int LAPACKE_zbbcsd( int matrix_layout, char jobu1, char jobu2,
@ -10898,11 +10942,11 @@ lapack_int LAPACKE_zsytri2x_work( int matrix_layout, char uplo, lapack_int n,
const lapack_int* ipiv,
lapack_complex_double* work, lapack_int nb );
lapack_int LAPACKE_zsytrs2( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, lapack_complex_double* a,
lapack_int nrhs, const lapack_complex_double* a,
lapack_int lda, const lapack_int* ipiv,
lapack_complex_double* b, lapack_int ldb );
lapack_int LAPACKE_zsytrs2_work( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, lapack_complex_double* a,
lapack_int nrhs, const lapack_complex_double* a,
lapack_int lda, const lapack_int* ipiv,
lapack_complex_double* b, lapack_int ldb,
lapack_complex_double* work );
@ -12026,6 +12070,44 @@ lapack_int LAPACKE_zgetsls_work( int matrix_layout, char trans, lapack_int m,
lapack_complex_double* b, lapack_int ldb,
lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_sgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
float* a, lapack_int lda,
float* t, lapack_int ldt );
lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt );
lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt );
lapack_int LAPACKE_zgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* t, lapack_int ldt );
lapack_int LAPACKE_sgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
float* a, lapack_int lda,
float* t, lapack_int ldt,
float* work, lapack_int lwork );
lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt,
double* work, lapack_int lwork );
lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork );
lapack_int LAPACKE_zgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_double* a, lapack_int lda,
lapack_complex_double* t, lapack_int ldt,
lapack_complex_double* work, lapack_int lwork );
lapack_int LAPACKE_ssyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n,
float* a, lapack_int lda, float* w );
lapack_int LAPACKE_dsyev_2stage( int matrix_layout, char jobz, char uplo, lapack_int n,

View File

@ -162,6 +162,8 @@ lapacke_cgetrs.o \
lapacke_cgetrs_work.o \
lapacke_cgetsls.o \
lapacke_cgetsls_work.o \
lapacke_cgetsqrhrt.o \
lapacke_cgetsqrhrt_work.o \
lapacke_cggbak.o \
lapacke_cggbak_work.o \
lapacke_cggbal.o \
@ -634,6 +636,8 @@ lapacke_cungrq.o \
lapacke_cungrq_work.o \
lapacke_cungtr.o \
lapacke_cungtr_work.o \
lapacke_cungtsqr_row.o \
lapacke_cungtsqr_row_work.o \
lapacke_cunmbr.o \
lapacke_cunmbr_work.o \
lapacke_cunmhr.o \
@ -778,6 +782,8 @@ lapacke_dgetrs.o \
lapacke_dgetrs_work.o \
lapacke_dgetsls.o \
lapacke_dgetsls_work.o \
lapacke_dgetsqrhrt.o \
lapacke_dgetsqrhrt_work.o \
lapacke_dggbak.o \
lapacke_dggbak_work.o \
lapacke_dggbal.o \
@ -900,6 +906,8 @@ lapacke_dorgrq.o \
lapacke_dorgrq_work.o \
lapacke_dorgtr.o \
lapacke_dorgtr_work.o \
lapacke_dorgtsqr_row.o \
lapacke_dorgtsqr_row_work.o \
lapacke_dormbr.o \
lapacke_dormbr_work.o \
lapacke_dormhr.o \
@ -1348,6 +1356,8 @@ lapacke_sgetrs.o \
lapacke_sgetrs_work.o \
lapacke_sgetsls.o \
lapacke_sgetsls_work.o \
lapacke_sgetsqrhrt.o \
lapacke_sgetsqrhrt_work.o \
lapacke_sggbak.o \
lapacke_sggbak_work.o \
lapacke_sggbal.o \
@ -1468,6 +1478,8 @@ lapacke_sorgrq.o \
lapacke_sorgrq_work.o \
lapacke_sorgtr.o \
lapacke_sorgtr_work.o \
lapacke_sorgtsqr_row.o \
lapacke_sorgtsqr_row_work.o \
lapacke_sormbr.o \
lapacke_sormbr_work.o \
lapacke_sormhr.o \
@ -1908,6 +1920,8 @@ lapacke_zgetrs.o \
lapacke_zgetrs_work.o \
lapacke_zgetsls.o \
lapacke_zgetsls_work.o \
lapacke_zgetsqrhrt.o \
lapacke_zgetsqrhrt_work.o \
lapacke_zggbak.o \
lapacke_zggbak_work.o \
lapacke_zggbal.o \
@ -2380,6 +2394,8 @@ lapacke_zungrq.o \
lapacke_zungrq_work.o \
lapacke_zungtr.o \
lapacke_zungtr_work.o \
lapacke_zungtsqr_row.o \
lapacke_zungtsqr_row_work.o \
lapacke_zunmbr.o \
lapacke_zunmbr_work.o \
lapacke_zunmhr.o \

View File

@ -56,6 +56,8 @@ lapack_int LAPACKE_cgesvd_work( int matrix_layout, char jobu, char jobvt,
( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
lapack_int ncols_vt = ( LAPACKE_lsame( jobvt, 'a' ) ||
LAPACKE_lsame( jobvt, 's' ) ) ? n : 1;
lapack_int lda_t = MAX(1,m);
lapack_int ldu_t = MAX(1,nrows_u);
lapack_int ldvt_t = MAX(1,nrows_vt);
@ -73,7 +75,7 @@ lapack_int LAPACKE_cgesvd_work( int matrix_layout, char jobu, char jobvt,
LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
return info;
}
if( ldvt < n ) {
if( ldvt < ncols_vt ) {
info = -12;
LAPACKE_xerbla( "LAPACKE_cgesvd_work", info );
return info;

View File

@ -0,0 +1,80 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
lapack_complex_float* work = NULL;
lapack_complex_float work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -7;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */
work = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_cgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt", info );
}
return info;
}

View File

@ -0,0 +1,108 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
lapack_complex_float* a, lapack_int lda,
lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_complex_float* a_t = NULL;
lapack_int ldt_t = MAX(1,nb2);
lapack_complex_float* t_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
return info;
}
if( ldt < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_cgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
LAPACKE_cge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_cgetsqrhrt_work", info );
}
return info;
}

View File

@ -78,7 +78,7 @@ lapack_int LAPACKE_cheev_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
if ( jobz == 'V') {
if ( jobz == 'V' || jobz == 'v' ) {
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
} else {
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );

View File

@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_2stage_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
if ( jobz == 'V') {
if ( jobz == 'V' || jobz == 'v' ) {
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
} else {
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );

View File

@ -79,7 +79,7 @@ lapack_int LAPACKE_cheevd_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
if ( jobz == 'V') {
if ( jobz == 'V' || jobz == 'v' ) {
LAPACKE_cge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
} else {
LAPACKE_che_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );

View File

@ -35,7 +35,7 @@
lapack_int LAPACKE_chegst( int matrix_layout, lapack_int itype, char uplo,
lapack_int n, lapack_complex_float* a,
lapack_int lda, lapack_complex_float* b,
lapack_int lda, const lapack_complex_float* b,
lapack_int ldb )
{
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {

View File

@ -35,7 +35,7 @@
lapack_int LAPACKE_chegst_work( int matrix_layout, lapack_int itype, char uplo,
lapack_int n, lapack_complex_float* a,
lapack_int lda, lapack_complex_float* b,
lapack_int lda, const lapack_complex_float* b,
lapack_int ldb )
{
lapack_int info = 0;

View File

@ -50,10 +50,10 @@ lapack_int LAPACKE_chegv( int matrix_layout, lapack_int itype, char jobz,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -6;
}
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
return -8;
}
}

View File

@ -50,10 +50,10 @@ lapack_int LAPACKE_chegv_2stage( int matrix_layout, lapack_int itype, char jobz,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -6;
}
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
return -8;
}
}

View File

@ -55,10 +55,10 @@ lapack_int LAPACKE_chegvd( int matrix_layout, lapack_int itype, char jobz,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -6;
}
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
return -8;
}
}

View File

@ -60,7 +60,7 @@ lapack_int LAPACKE_chegvx( int matrix_layout, lapack_int itype, char jobz,
if( LAPACKE_s_nancheck( 1, &abstol, 1 ) ) {
return -15;
}
if( LAPACKE_cge_nancheck( matrix_layout, n, n, b, ldb ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
return -9;
}
if( LAPACKE_lsame( range, 'v' ) ) {

View File

@ -46,7 +46,7 @@ lapack_int LAPACKE_chetri2x( int matrix_layout, char uplo, lapack_int n,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -4;
}
}

View File

@ -42,9 +42,6 @@ lapack_int LAPACKE_clacpy_work( int matrix_layout, char uplo, lapack_int m,
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_clacpy( &uplo, &m, &n, a, &lda, b, &ldb );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_int ldb_t = MAX(1,m);

View File

@ -41,45 +41,46 @@ float LAPACKE_clantr_work( int matrix_layout, char norm, char uplo,
lapack_int info = 0;
float res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
/* Call LAPACK function */
res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_complex_float* a_t = NULL;
float* work_lapack = NULL;
char norm_lapack;
char uplo_lapack;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_clantr_work", info );
return info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,MAX(m,n)) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
norm_lapack = 'i';
} else if( LAPACKE_lsame( norm, 'i' ) ) {
norm_lapack = '1';
} else {
norm_lapack = norm;
}
if( LAPACKE_lsame( uplo, 'u' ) ) {
uplo_lapack = 'l';
} else {
uplo_lapack = 'u';
}
/* Allocate memory for work array(s) */
if( LAPACKE_lsame( norm, 'i' ) ) {
work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,m) );
if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
work_lapack = (float*)LAPACKE_malloc( sizeof(float) * MAX(1,n) );
if( work_lapack == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1;
goto exit_level_0;
}
}
/* Transpose input matrices */
LAPACKE_ctr_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
res = LAPACK_clantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
/* Call LAPACK function */
res = LAPACK_clantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
/* Release memory and exit */
if( work_lapack ) {
LAPACKE_free( work_lapack );
}
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_clantr_work", info );
}
} else {

View File

@ -83,6 +83,7 @@ lapack_int LAPACKE_clascl( int matrix_layout, char type, lapack_int kl,
LAPACKE_cgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) {
return -9;
}
break;
case 'B':
// TYPE = 'B' - lower part of symmetric band matrix (assume m==n)
if( LAPACKE_chb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) {

View File

@ -42,9 +42,6 @@ lapack_int LAPACKE_claset_work( int matrix_layout, char uplo, lapack_int m,
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_claset( &uplo, &m, &n, &alpha, &beta, a, &lda );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_complex_float* a_t = NULL;

View File

@ -45,7 +45,7 @@ lapack_int LAPACKE_csyconv( int matrix_layout, char uplo, char way, lapack_int n
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_csy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -5;
}
}

View File

@ -34,7 +34,7 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_csytrs2( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, lapack_complex_float* a,
lapack_int nrhs, const lapack_complex_float* a,
lapack_int lda, const lapack_int* ipiv,
lapack_complex_float* b, lapack_int ldb )
{

View File

@ -34,7 +34,7 @@
#include "lapacke_utils.h"
lapack_int LAPACKE_csytrs2_work( int matrix_layout, char uplo, lapack_int n,
lapack_int nrhs, lapack_complex_float* a,
lapack_int nrhs, const lapack_complex_float* a,
lapack_int lda, const lapack_int* ipiv,
lapack_complex_float* b, lapack_int ldb,
lapack_complex_float* work )

View File

@ -44,7 +44,7 @@ lapack_int LAPACKE_ctrttf( int matrix_layout, char transr, char uplo,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_ctr_nancheck( matrix_layout, uplo, 'n', n, a, lda ) ) {
return -5;
}
}

View File

@ -44,7 +44,7 @@ lapack_int LAPACKE_ctrttp( int matrix_layout, char uplo, lapack_int n,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_ctr_nancheck( matrix_layout, uplo, 'n', n, a, lda ) ) {
return -4;
}
}

View File

@ -48,7 +48,7 @@ lapack_int LAPACKE_cungtr( int matrix_layout, char uplo, lapack_int n,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -4;
}
if( LAPACKE_c_nancheck( n-1, tau, 1 ) ) {

View File

@ -0,0 +1,83 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function cungtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cungtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
lapack_complex_float* work = NULL;
lapack_complex_float work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_cungtsqr_row", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_cge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -6;
}
if( LAPACKE_cge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
return -8;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = LAPACK_C2INT( work_query );
/* Allocate memory for work arrays */
work = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_cungtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cungtsqr_row", info );
}
return info;
}

View File

@ -0,0 +1,109 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function cungtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_cungtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
lapack_complex_float* a, lapack_int lda,
const lapack_complex_float* t, lapack_int ldt,
lapack_complex_float* work, lapack_int lwork )
{
lapack_int info = 0;
if (matrix_layout == LAPACK_COL_MAJOR) {
/* Call LAPACK function and adjust info */
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
work, &lwork, &info);
if (info < 0) {
info = info - 1;
}
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
lapack_int lda_t = MAX(1,m);
lapack_complex_float* a_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -7;
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
return info;
}
lapack_int ldt_t = MAX(1,nb);
lapack_complex_float* t_t = NULL;
/* Check leading dimension(s) */
if( ldt < n ) {
info = -9;
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (lapack_complex_float*)
LAPACKE_malloc( sizeof(lapack_complex_float) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_cge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
LAPACKE_cge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
/* Call LAPACK function and adjust info */
LAPACK_cungtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_cge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_cungtsqr_row_work", info );
}
return info;
}

View File

@ -52,7 +52,7 @@ lapack_int LAPACKE_cunmtr( int matrix_layout, char side, char uplo, char trans,
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
r = LAPACKE_lsame( side, 'l' ) ? m : n;
if( LAPACKE_cge_nancheck( matrix_layout, r, r, a, lda ) ) {
if( LAPACKE_che_nancheck( matrix_layout, uplo, r, a, lda ) ) {
return -7;
}
if( LAPACKE_cge_nancheck( matrix_layout, m, n, c, ldc ) ) {

View File

@ -54,6 +54,8 @@ lapack_int LAPACKE_dgesvd_work( int matrix_layout, char jobu, char jobvt,
( LAPACKE_lsame( jobu, 's' ) ? MIN(m,n) : 1);
lapack_int nrows_vt = LAPACKE_lsame( jobvt, 'a' ) ? n :
( LAPACKE_lsame( jobvt, 's' ) ? MIN(m,n) : 1);
lapack_int ncols_vt = ( LAPACKE_lsame( jobvt, 'a' ) ||
LAPACKE_lsame( jobvt, 's' ) ) ? n : 1;
lapack_int lda_t = MAX(1,m);
lapack_int ldu_t = MAX(1,nrows_u);
lapack_int ldvt_t = MAX(1,nrows_vt);
@ -71,7 +73,7 @@ lapack_int LAPACKE_dgesvd_work( int matrix_layout, char jobu, char jobvt,
LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
return info;
}
if( ldvt < n ) {
if( ldvt < ncols_vt ) {
info = -12;
LAPACKE_xerbla( "LAPACKE_dgesvd_work", info );
return info;

View File

@ -0,0 +1,79 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dgetsqrhrt( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
double* work = NULL;
double work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -7;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = (lapack_int)work_query;
/* Allocate memory for work arrays */
work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_dgetsqrhrt_work( matrix_layout, m, n, mb1, nb1, nb2,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt", info );
}
return info;
}

View File

@ -0,0 +1,106 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dgetsqrhrt
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dgetsqrhrt_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb1, lapack_int nb1, lapack_int nb2,
double* a, lapack_int lda,
double* t, lapack_int ldt,
double* work, lapack_int lwork )
{
lapack_int info = 0;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda, t, &ldt,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
double* a_t = NULL;
lapack_int ldt_t = MAX(1,nb2);
double* t_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
return info;
}
if( ldt < n ) {
info = -10;
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
LAPACK_dgetsqrhrt( &m, &n, &mb1, &nb1, &nb2, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
LAPACKE_dge_trans( LAPACK_COL_MAJOR, nb2, n, t_t, ldt_t, t, ldt );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_dgetsqrhrt_work", info );
}
return info;
}

View File

@ -41,9 +41,6 @@ lapack_int LAPACKE_dlacpy_work( int matrix_layout, char uplo, lapack_int m,
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_dlacpy( &uplo, &m, &n, a, &lda, b, &ldb );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
lapack_int ldb_t = MAX(1,m);

View File

@ -40,44 +40,46 @@ double LAPACKE_dlantr_work( int matrix_layout, char norm, char uplo,
lapack_int info = 0;
double res = 0.;
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
/* Call LAPACK function */
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a, &lda, work );
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
double* a_t = NULL;
double* work_lapack = NULL;
char norm_lapack;
char uplo_lapack;
/* Check leading dimension(s) */
if( lda < n ) {
info = -8;
LAPACKE_xerbla( "LAPACKE_dlantr_work", info );
return info;
}
/* Allocate memory for temporary array(s) */
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,MAX(m,n)) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
if( LAPACKE_lsame( norm, '1' ) || LAPACKE_lsame( norm, 'o' ) ) {
norm_lapack = 'i';
} else if( LAPACKE_lsame( norm, 'i' ) ) {
norm_lapack = '1';
} else {
norm_lapack = norm;
}
if( LAPACKE_lsame( uplo, 'u' ) ) {
uplo_lapack = 'l';
} else {
uplo_lapack = 'u';
}
/* Allocate memory for work array(s) */
if( LAPACKE_lsame( norm, 'i' ) ) {
work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,m) );
if( LAPACKE_lsame( norm_lapack, 'i' ) ) {
work_lapack = (double*)LAPACKE_malloc( sizeof(double) * MAX(1,n) );
if( work_lapack == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_1;
goto exit_level_0;
}
}
/* Transpose input matrices */
LAPACKE_dtr_trans( matrix_layout, uplo, diag, MAX(m,n), a, lda, a_t, lda_t );
/* Call LAPACK function and adjust info */
res = LAPACK_dlantr( &norm, &uplo, &diag, &m, &n, a_t, &lda_t, work_lapack );
/* Call LAPACK function */
res = LAPACK_dlantr( &norm_lapack, &uplo_lapack, &diag, &n, &m, a, &lda, work_lapack );
/* Release memory and exit */
if( work_lapack ) {
LAPACKE_free( work_lapack );
}
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dlantr_work", info );
}
} else {

View File

@ -83,6 +83,7 @@ lapack_int LAPACKE_dlascl( int matrix_layout, char type, lapack_int kl,
LAPACKE_dgb_nancheck( LAPACK_COL_MAJOR, n, m, n-1, 1, a-1, lda+1 ) ) {
return -9;
}
break;
case 'B':
// TYPE = 'B' - lower part of symmetric band matrix (assume m==n)
if( LAPACKE_dsb_nancheck( matrix_layout, 'L', n, kl, a, lda ) ) {

View File

@ -41,9 +41,6 @@ lapack_int LAPACKE_dlaset_work( int matrix_layout, char uplo, lapack_int m,
if( matrix_layout == LAPACK_COL_MAJOR ) {
/* Call LAPACK function and adjust info */
LAPACK_dlaset( &uplo, &m, &n, &alpha, &beta, a, &lda );
if( info < 0 ) {
info = info - 1;
}
} else if( matrix_layout == LAPACK_ROW_MAJOR ) {
lapack_int lda_t = MAX(1,m);
double* a_t = NULL;

View File

@ -47,7 +47,7 @@ lapack_int LAPACKE_dorgtr( int matrix_layout, char uplo, lapack_int n, double* a
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -4;
}
if( LAPACKE_d_nancheck( n-1, tau, 1 ) ) {

View File

@ -0,0 +1,82 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native high-level C interface to LAPACK function dorgtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dorgtsqr_row( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt )
{
lapack_int info = 0;
lapack_int lwork = -1;
double* work = NULL;
double work_query;
if( matrix_layout != LAPACK_COL_MAJOR && matrix_layout != LAPACK_ROW_MAJOR ) {
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", -1 );
return -1;
}
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, m, n, a, lda ) ) {
return -6;
}
if( LAPACKE_dge_nancheck( matrix_layout, nb, n, t, ldt ) ) {
return -8;
}
}
#endif
/* Query optimal working array(s) size */
info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, &work_query, lwork );
if( info != 0 ) {
goto exit_level_0;
}
lwork = (lapack_int)work_query;
/* Allocate memory for work arrays */
work = (double*)LAPACKE_malloc( sizeof(double) * lwork );
if( work == NULL ) {
info = LAPACK_WORK_MEMORY_ERROR;
goto exit_level_0;
}
/* Call middle-level interface */
info = LAPACKE_dorgtsqr_row_work( matrix_layout, m, n, mb, nb,
a, lda, t, ldt, work, lwork );
/* Release memory and exit */
LAPACKE_free( work );
exit_level_0:
if( info == LAPACK_WORK_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row", info );
}
return info;
}

View File

@ -0,0 +1,108 @@
/*****************************************************************************
Copyright (c) 2020, Intel Corp.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of Intel Corporation nor the names of its contributors
may be used to endorse or promote products derived from this software
without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************
* Contents: Native middle-level C interface to LAPACK function dorgtsqr_row
* Author: Intel Corporation
*****************************************************************************/
#include "lapacke_utils.h"
lapack_int LAPACKE_dorgtsqr_row_work( int matrix_layout, lapack_int m, lapack_int n,
lapack_int mb, lapack_int nb,
double* a, lapack_int lda,
const double* t, lapack_int ldt,
double* work, lapack_int lwork )
{
lapack_int info = 0;
if (matrix_layout == LAPACK_COL_MAJOR) {
/* Call LAPACK function and adjust info */
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda, t, &ldt,
work, &lwork, &info);
if (info < 0) {
info = info - 1;
}
} else if (matrix_layout == LAPACK_ROW_MAJOR) {
lapack_int lda_t = MAX(1,m);
double* a_t = NULL;
/* Check leading dimension(s) */
if( lda < n ) {
info = -7;
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
return info;
}
lapack_int ldt_t = MAX(1,nb);
double* t_t = NULL;
/* Check leading dimension(s) */
if( ldt < n ) {
info = -9;
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
return info;
}
/* Query optimal working array(s) size if requested */
if( lwork == -1 ) {
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a, &lda_t, t, &ldt_t,
work, &lwork, &info );
return (info < 0) ? (info - 1) : info;
}
/* Allocate memory for temporary array(s) */
a_t = (double*)LAPACKE_malloc( sizeof(double) * lda_t * MAX(1,n) );
if( a_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_0;
}
t_t = (double*)LAPACKE_malloc( sizeof(double) * ldt_t * MAX(1,n) );
if( t_t == NULL ) {
info = LAPACK_TRANSPOSE_MEMORY_ERROR;
goto exit_level_1;
}
/* Transpose input matrices */
LAPACKE_dge_trans( matrix_layout, m, n, a, lda, a_t, lda_t );
LAPACKE_dge_trans( matrix_layout, nb, n, a, lda, t_t, ldt_t );
/* Call LAPACK function and adjust info */
LAPACK_dorgtsqr_row( &m, &n, &mb, &nb, a_t, &lda_t, t_t, &ldt_t,
work, &lwork, &info );
if( info < 0 ) {
info = info - 1;
}
/* Transpose output matrices */
LAPACKE_dge_trans( LAPACK_COL_MAJOR, m, n, a_t, lda_t, a, lda );
/* Release memory and exit */
LAPACKE_free( t_t );
exit_level_1:
LAPACKE_free( a_t );
exit_level_0:
if( info == LAPACK_TRANSPOSE_MEMORY_ERROR ) {
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
}
} else {
info = -1;
LAPACKE_xerbla( "LAPACKE_dorgtsqr_row_work", info );
}
return info;
}

View File

@ -51,7 +51,7 @@ lapack_int LAPACKE_dormtr( int matrix_layout, char side, char uplo, char trans,
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
r = LAPACKE_lsame( side, 'l' ) ? m : n;
if( LAPACKE_dge_nancheck( matrix_layout, r, r, a, lda ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, r, a, lda ) ) {
return -7;
}
if( LAPACKE_dge_nancheck( matrix_layout, m, n, c, ldc ) ) {

View File

@ -43,7 +43,7 @@ lapack_int LAPACKE_dsyconv( int matrix_layout, char uplo, char way, lapack_int n
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -5;
}
}

View File

@ -72,7 +72,7 @@ lapack_int LAPACKE_dsyev_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
if ( jobz == 'V') {
if ( jobz == 'V' || jobz == 'v' ) {
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
} else {
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );

View File

@ -76,7 +76,7 @@ lapack_int LAPACKE_dsyevd_2stage_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
if ( jobz == 'V') {
if ( jobz == 'V' || jobz == 'v' ) {
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
} else {
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );

View File

@ -76,7 +76,7 @@ lapack_int LAPACKE_dsyevd_work( int matrix_layout, char jobz, char uplo,
info = info - 1;
}
/* Transpose output matrices */
if ( jobz == 'V') {
if ( jobz == 'V' || jobz == 'v' ) {
LAPACKE_dge_trans( LAPACK_COL_MAJOR, n, n, a_t, lda_t, a, lda );
} else {
LAPACKE_dsy_trans( LAPACK_COL_MAJOR, uplo, n, a_t, lda_t, a, lda );

View File

@ -47,7 +47,7 @@ lapack_int LAPACKE_dsygst( int matrix_layout, lapack_int itype, char uplo,
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -5;
}
if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
return -7;
}
}

View File

@ -48,10 +48,10 @@ lapack_int LAPACKE_dsygv( int matrix_layout, lapack_int itype, char jobz,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -6;
}
if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
return -8;
}
}

View File

@ -48,10 +48,10 @@ lapack_int LAPACKE_dsygv_2stage( int matrix_layout, lapack_int itype, char jobz,
#ifndef LAPACK_DISABLE_NAN_CHECK
if( LAPACKE_get_nancheck() ) {
/* Optionally check input matrices for NaNs */
if( LAPACKE_dge_nancheck( matrix_layout, n, n, a, lda ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, a, lda ) ) {
return -6;
}
if( LAPACKE_dge_nancheck( matrix_layout, n, n, b, ldb ) ) {
if( LAPACKE_dsy_nancheck( matrix_layout, uplo, n, b, ldb ) ) {
return -8;
}
}

Some files were not shown because too many files have changed in this diff Show More