Merge branch 'shivam-develop' into shivam-Locks

This commit is contained in:
shivammonaka 2024-02-29 11:58:14 +05:30
commit d49ebc54e1
1821 changed files with 269629 additions and 52620 deletions

177
.cirrus.yml Normal file
View File

@ -0,0 +1,177 @@
macos_instance:
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
#task:
# name: AppleM1/LLVM
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang
#task:
# name: AppleM1/LLVM/ILP64
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang INTERFACE64=1
#task:
# name: AppleM1/LLVM/CMAKE
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - mkdir build
# - cd build
# - cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON ..
# - make -j 4
#task:
# name: AppleM1/GCC/MAKE/OPENMP
# compile_script:
# - brew install gcc@11
# - export PATH=/opt/homebrew/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/lib"
# - export CPPFLAGS="-I/opt/homebrew/include"
# - make CC=gcc-11 FC=gfortran-11 USE_OPENMP=1
macos_instance:
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
task:
name: AppleM1/LLVM x86_64 xbuild
compile_script:
- #brew install llvm
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
- export ARCHS="i386 x86_64"
- export ARCHS_STANDARD="i386 x86_64"
- export ARCHS_STANDARD_32_64_BIT="i386 x86_64"
- export ARCHS_STANDARD_64_BIT=x86_64
- export ARCHS_STANDARD_INCLUDING_64_BIT="i386 x86_64"
- export ARCHS_UNIVERSAL_IPHONE_OS="i386 x86_64"
- export VALID_ARCHS="i386 x86_64"
- xcrun --sdk macosx --show-sdk-path
- xcodebuild -version
- export CC=/Applications/Xcode-14.0.0.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode-14.0.0.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX12.3.sdk -arch x86_64"
- make TARGET=CORE2 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
always:
config_artifacts:
path: "*conf*"
type: text/plain
# lib_artifacts:
# path: "libopenblas*"
# type: application/octet-streamm
macos_instance:
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
task:
name: AppleM1/LLVM armv8-ios xbuild
compile_script:
- #brew install llvm
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
- export CC=/Applications/Xcode-14.0.0.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode-14.0.0.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.0.sdk -arch arm64 -miphoneos-version-min=10.0"
- make TARGET=ARMV8 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 CROSS=1
always:
config_artifacts:
path: "*conf*"
type: text/plain
macos_instance:
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
task:
name: AppleM1/LLVM armv7-androidndk xbuild
compile_script:
- #brew install android-ndk
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
- find /System/Volumes/Data/opt/homebrew/Caskroom/android-ndk/25b -name "armv7a-linux-androideabi*-ranlib"
- #export CC=/Applications/Xcode-13.4.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
- #export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode-13.4.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.0.sdk -arch arm64 -miphoneos-version-min=10.0"
- export CC=/System/Volumes/Data/opt/homebrew/Caskroom/android-ndk/25b/AndroidNDK8937393.app/Contents/NDK/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang
- make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
always:
config_artifacts:
path: "*conf*"
type: text/plain
task:
name: NeoverseN1
arm_container:
image: node:latest
compile_script:
- make
task:
name: NeoverseN1-ILP64
arm_container:
image: node:latest
compile_script:
- make INTERFACE64=1
task:
name: NeoverseN1-OMP
arm_container:
image: node:latest
cpu: 8
compile_script:
- make USE_OPENMP=1
FreeBSD_task:
name: FreeBSD-gcc12
freebsd_instance:
image_family: freebsd-13-2
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
compile_script:
- ls -l /usr/local/lib
- gmake CC=gcc
FreeBSD_task:
name: freebsd-gcc12-ilp64
freebsd_instance:
image_family: freebsd-13-2
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
compile_script:
- ls -l /usr/local/lib
- gmake CC=gcc INTERFACE64=1
FreeBSD_task:
name: FreeBSD-clang-openmp
freebsd_instance:
image_family: freebsd-13-2
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
- ln -s /usr/local/lib/gcc12/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
compile_script:
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
#task:
# name: Windows/LLVM16 --- too slow ---
# windows_container:
# image: cirrusci/windowsservercore:cmake-2021.12.07
# install_script:
# - choco list --localonly
# - choco install -y llvm
# - # choco install -y cmake --installargs '"ADD_CMAKE_TO_PATH=System"'
# - choco install -y ninja
# - refreshenv
# - cd "c:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Auxiliary/Build"
# - vcvarsall x64
# - cd "C:\Users\ContainerAdministrator\AppData\Local\Temp\cirrus-ci-build"
# - cmake -S . -B build -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release
# - cd build
# - cmake --build .
# - ctest

16
.cirun.yml Normal file
View File

@ -0,0 +1,16 @@
# Self-Hosted Github Action Runners on AWS via Cirun.io
# Reference: https://docs.cirun.io/reference/yaml
runners:
- name: "aws-runner-graviton"
# Cloud Provider: AWS
cloud: "aws"
region: "us-east-1"
# Cheapest VM on AWS
instance_type: "c7g.large"
# Ubuntu-22.04, ami image
machine_image: "ami-0a0c8eebcdd6dcbd0"
preemptible: false
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
# So that this runner is created for running the workflow
labels:
- "cirun-aws-runner-graviton"

149
.github/workflows/apple_m.yml vendored Normal file
View File

@ -0,0 +1,149 @@
name: apple m
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: macos-14
strategy:
fail-fast: false
matrix:
build: [cmake, make]
fortran: [gfortran]
openmp: [0, 1]
ilp64: [0, 1]
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
elif [ "$RUNNER_OS" == "macOS" ]; then
sysctl -a | grep machdep.cpu
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get install -y gfortran cmake ccache libtinfo5
elif [ "$RUNNER_OS" == "macOS" ]; then
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
brew reinstall gcc
brew install coreutils cmake ccache
brew install llvm
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}
- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
elif [ "$RUNNER_OS" = "macOS" ]; then
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
echo "/opt/homebrew/opt/llvm/bin" >>$GITHUB_PATH
echo "" >>$GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
export CC="/opt/homebrew/opt/llvm/bin/clang"
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=${{matrix.openmp}} INTERFACE64=${{matrix.ilp64}} FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
export LDFLAGS="$LDFLAGS -Wl,-ld_classic"
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DUSE_OPENMP=${{matrix.openmp}} \
-DINTERFACE64=${{matrix.ilp64}} \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

139
.github/workflows/arm64_graviton.yml vendored Normal file
View File

@ -0,0 +1,139 @@
name: arm64 graviton cirun
on:
push:
branches:
- develop
- release-**
pull_request:
branches:
- develop
- release-**
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
strategy:
fail-fast: false
matrix:
fortran: [gfortran]
build: [cmake, make]
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt update
sudo apt-get install -y gfortran cmake ccache libtinfo5
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}
- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Build OpenBLAS
run: |
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac
- name: Show ccache status
continue-on-error: true
run: ccache -s
- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

126
.github/workflows/c910v.yml vendored Normal file
View File

@ -0,0 +1,126 @@
name: c910v qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz
strategy:
fail-fast: false
matrix:
include:
- target: RISCV64_GENERIC
triple: riscv64-linux-gnu
apt_triple: riscv64-linux-gnu
opts: NO_SHARED=1 TARGET=RISCV64_GENERIC
- target: C910V
triple: riscv64-unknown-linux-gnu
apt_triple: riscv64-linux-gnu
opts: NO_SHARED=1 TARGET=C910V
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
gcc-${{ matrix.apt_triple }} gfortran-${{ matrix.apt_triple }} libgomp1-riscv64-cross
- name: checkout qemu
uses: actions/checkout@v3
with:
repository: T-head-Semi/qemu
path: qemu
ref: 1e692ebb43d396c52352406323fc782c1ac99a42
- name: build qemu
run: |
# Force use c910v qemu-user
wget https://github.com/revyos/qemu/commit/5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
cd qemu
patch -p1 < ../5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
make -j$(nproc)
make install
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: build OpenBLAS
run: |
wget ${xuetie_toolchain}/${toolchain_file_name}
tar -xvf ${toolchain_file_name} -C /opt
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH"
make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-riscv64 ./utest/openblas_utest
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat

View File

@ -2,11 +2,16 @@ name: continuous build
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ${{ matrix.os }}
strategy:
@ -37,6 +42,7 @@ jobs:
- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update
sudo apt-get install -y gfortran cmake ccache libtinfo5
elif [ "$RUNNER_OS" == "macOS" ]; then
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
@ -146,45 +152,59 @@ jobs:
msys2:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: windows-latest
strategy:
fail-fast: false
matrix:
msystem: [MINGW64, MINGW32, CLANG64]
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
idx: [int32, int64]
build-type: [Release]
include:
- msystem: MINGW64
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-x86_64
fc-pkg: mingw-w64-x86_64-gcc-fortran
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: MINGW32
idx: int32
target-prefix: mingw-w64-i686
fc-pkg: mingw-w64-i686-gcc-fortran
fc-pkg: fc
- msystem: CLANG64
idx: int32
target-prefix: mingw-w64-clang-x86_64
fc-pkg: fc
# Compiling with Flang 16 seems to cause test errors on machines
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
no-avx512-flags: -DNO_AVX512=1
- msystem: CLANG32
idx: int32
target-prefix: mingw-w64-clang-i686
fc-pkg: cc
c-lapack-flags: -DC_LAPACK=ON
- msystem: MINGW64
- msystem: UCRT64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-x86_64
fc-pkg: mingw-w64-x86_64-gcc-fortran
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: CLANG64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-clang-x86_64
c-lapack-flags: -DC_LAPACK=ON
- msystem: MINGW64
fc-pkg: fc
# Compiling with Flang 16 seems to cause test errors on machines
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
no-avx512-flags: -DNO_AVX512=1
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-x86_64
fc-pkg: mingw-w64-x86_64-gcc-fortran
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
build-type: None
exclude:
- msystem: MINGW32
idx: int64
- msystem: CLANG32
idx: int64
defaults:
run:
@ -209,7 +229,7 @@ jobs:
install: >-
base-devel
${{ matrix.target-prefix }}-cc
${{ matrix.fc-pkg }}
${{ matrix.target-prefix }}-${{ matrix.fc-pkg }}
${{ matrix.target-prefix }}-cmake
${{ matrix.target-prefix }}-ninja
${{ matrix.target-prefix }}-ccache
@ -217,14 +237,21 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v3
- name: Compilation cache
uses: actions/cache@v3
with:
# It looks like this path needs to be hard-coded.
path: C:/msys64/home/runneradmin/.ccache
- name: Prepare ccache
# Get cache location of ccache
# Create key that is used in action/cache/restore and action/cache/save steps
id: ccache-prepare
run: |
echo "ccachedir=$(cygpath -m $(ccache -k cache_dir))" >> $GITHUB_OUTPUT
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
key: ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}
echo "key=ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}" >> $GITHUB_OUTPUT
- name: Restore ccache
uses: actions/cache/restore@v3
with:
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
key: ${{ steps.ccache-prepare.outputs.key }}
# Restore a matching ccache cache entry. Prefer same branch.
restore-keys: |
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}
@ -234,9 +261,10 @@ jobs:
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota.
run: |
which ccache
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 250M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
test -d ${{ steps.ccache-prepare.outputs.ccachedir }} || mkdir -p ${{ steps.ccache-prepare.outputs.ccachedir }}
echo "max_size = 250M" > ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
echo "compression = true" >> ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
ccache -p
ccache -s
echo $HOME
cygpath -w $HOME
@ -253,6 +281,7 @@ jobs:
-DTARGET=CORE2 \
${{ matrix.idx64-flags }} \
${{ matrix.c-lapack-flags }} \
${{ matrix.no-avx512-flags }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
@ -264,12 +293,33 @@ jobs:
continue-on-error: true
run: ccache -s
- name: Save ccache
# Save the cache after we are done (successfully) building
uses: actions/cache/save@v3
with:
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
key: ${{ steps.ccache-prepare.outputs.key }}
- name: Run tests
id: run-ctest
timeout-minutes: 60
run: cd build && ctest
- name: Re-run tests
if: always() && (steps.run-ctest.outcome == 'failure')
timeout-minutes: 60
run: |
cd build
echo "::group::Re-run ctest"
ctest --rerun-failed --output-on-failure || true
echo "::endgroup::"
echo "::group::Log from these tests"
[ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log
echo "::endgroup::"
cross_build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-22.04
strategy:
@ -295,6 +345,7 @@ jobs:
- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
- name: Compilation cache

119
.github/workflows/loongarch64.yml vendored Normal file
View File

@ -0,0 +1,119 @@
name: loongarch64 qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: LOONGSONGENERIC
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
- target: LOONGSON3R5
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: DYNAMIC_ARCH
triple: loongarch64-unknown-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install APT deps
run: |
sudo add-apt-repository ppa:savoury1/virtualisation
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
qemu-user-static
- name: Download and install loongarch64-toolchain
run: |
wget https://github.com/sunhaiyong1978/CLFS-for-LoongArch/releases/download/8.1/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
#wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt
- name: Set env
run: |
echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
echo "PATH=$GITHUB_WORKSPACE:/opt/cross-tools/bin:$PATH" >> $GITHUB_ENV
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Disable utest dsdot:dsdot_n_1
run: |
echo -n > utest/test_dsdot.c
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
- name: Build OpenBLAS
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat

View File

@ -2,11 +2,16 @@ name: mips64 qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false

View File

@ -18,11 +18,16 @@ on:
name: Nightly-Homebrew-Build
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build-OpenBLAS-with-Homebrew:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: macos-latest
env:
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer

253
.github/workflows/riscv64_vector.yml vendored Normal file
View File

@ -0,0 +1,253 @@
name: riscv64 zvl256b qemu test
on: [push, pull_request]
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
triple: riscv64-unknown-linux-gnu
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain
riscv_gnu_toolchain_version: 13.2.0
riscv_gnu_toolchain_nightly_download_path: /releases/download/2024.02.02/riscv64-glibc-ubuntu-22.04-llvm-nightly-2024.02.02-nightly.tar.gz
strategy:
fail-fast: false
matrix:
include:
- target: RISCV64_ZVL128B
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64
- target: RISCV64_ZVL256B
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make \
libgomp1-riscv64-cross ccache
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path}
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt
- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}
- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: build OpenBLAS libs
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc)
- name: build OpenBLAS tests
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='${triple}-gcc' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests
- name: build lapack-netlib tests
working-directory: ./lapack-netlib/TESTING
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='${triple}-gcc' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \
LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \
LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \
LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \
- name: OpenBLAS tests
shell: bash
run: |
export PATH="/opt/riscv/bin:$PATH"
export QEMU_CPU=${{ matrix.qemu_cpu }}
rm -rf ./test_out
mkdir -p ./test_out
run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \
echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \
if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \
else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \
}
run_test test cblat1 &
run_test test cblat2 cblat2.dat &
run_test test cblat3 cblat3.dat &
run_test test dblat1 &
run_test test dblat2 dblat2.dat &
run_test test dblat3 dblat3.dat &
run_test test sblat1 &
run_test test sblat2 sblat2.dat &
run_test test sblat3 sblat3.dat &
run_test test zblat1 &
run_test test zblat2 zblat2.dat &
run_test test zblat3 zblat3.dat &
run_test ctest xccblat1 &
run_test ctest xccblat2 cin2 &
run_test ctest xccblat3 cin3 &
run_test ctest xdcblat1 &
run_test ctest xdcblat2 din2 &
run_test ctest xdcblat3 din3 &
run_test ctest xscblat1 &
run_test ctest xscblat2 sin2 &
run_test ctest xscblat3 sin3 &
run_test ctest xzcblat1 &
run_test ctest xzcblat2 zin2 &
run_test ctest xzcblat3 zin3 &
wait
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
- name: netlib tests
shell: bash
run: |
: # these take a very long time
echo "Skipping netlib tests in CI"
exit 0
: # comment out exit above to enable the tests
: # probably we want to identify a subset to run in CI
export PATH="/opt/riscv/bin:$PATH"
export QEMU_CPU=${{ matrix.qemu_cpu }}
rm -rf ./test_out
mkdir -p ./test_out
run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \
echo "$4" >> $OUTPUT; \
echo "$CMD" >> $OUTPUT; \
qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \
if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \
if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \
}
run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" &
run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" &
run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" &
run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" &
run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" &
run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" &
run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" &
run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" &
run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" &
run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" &
run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" &
run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" &
run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" &
run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" &
run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" &
run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" &
run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" &
run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" &
run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" &
run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" &
run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" &
run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" &
run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" &
run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" &
run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" &
run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" &
run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" &
run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" &
run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" &
run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" &
run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" &
run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" &
run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" &
run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" &
run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" &
run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" &
run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" &
run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" &
run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" &
run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" &
run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" &
run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" &
run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" &
run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" &
run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" &
run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" &
run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" &
run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" &
run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" &
run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" &
run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" &
run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" &
run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" &
run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" &
run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" &
run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" &
run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" &
run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" &
run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" &
run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" &
run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" &
run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" &
wait
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary
TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)"
NUMERICAL_ERRORS=-1
OTHER_ERRORS=-1
. <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary
if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi

4
.gitignore vendored
View File

@ -14,6 +14,7 @@ lapack-3.4.2
lapack-3.4.2.tgz
lapack-netlib/make.inc
lapack-netlib/lapacke/include/lapacke_mangling.h
lapack-netlib/SRC/la_constants.mod
lapack-netlib/TESTING/testing_results.txt
lapack-netlib/INSTALL/test*
lapack-netlib/TESTING/xeigtstc
@ -46,6 +47,7 @@ config_last.h
getarch
getarch_2nd
utest/openblas_utest
utest/openblas_utest_ext
ctest/xccblat1
ctest/xccblat2
ctest/xccblat3
@ -71,6 +73,7 @@ test/SBLAT3.SUMM
test/ZBLAT2.SUMM
test/ZBLAT3.SUMM
test/SHBLAT3.SUMM
test/SBBLAT3.SUMM
test/cblat1
test/cblat2
test/cblat3
@ -81,6 +84,7 @@ test/sblat1
test/sblat2
test/sblat3
test/test_shgemm
test/test_sbgemm
test/zblat1
test/zblat2
test/zblat3

View File

@ -8,7 +8,7 @@ project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 23)
set(OpenBLAS_PATCH_VERSION 26.dev)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
@ -20,8 +20,12 @@ include(CMakePackageConfigHelpers)
#######
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON)
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
option(BUILD_BENCHMARKS "Build the collection of BLAS/LAPACK benchmarks" OFF)
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
@ -38,6 +42,11 @@ option(USE_PERL "Use the older PERL scripts for build preparation instead of uni
option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON)
option(FIXED_LIBNAME "Use a non-versioned name for the library and no symbolic linking to variant names" OFF)
set(LIBNAMEPREFIX "" CACHE STRING "Add a prefix to the openblas part of the library name" )
set(LIBNAMESUFFIX "" CACHE STRING "Add a suffix after the openblas part of the library name" )
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
else()
@ -94,7 +103,7 @@ message(WARNING "CMake support is experimental. It does not yet support all buil
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
set(OpenBLAS_LIBNAME openblas${SUFFIX64_UNDERSCORE})
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE})
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
@ -247,20 +256,21 @@ if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|Drago
endif()
endif()
if (APPLE AND DYNAMIC_ARCH AND BUILD_SHARED_LIBS)
# Seems that this hack doesn't required since macOS 11 Big Sur
if (APPLE AND BUILD_SHARED_LIBS AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
set (CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
if (NOT NOFORTRAN)
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ar -ru libopenblas.a && exit 0' "
"sh -c 'ar -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
"sh -c 'ls -l ${CMAKE_BINARY_DIR}/lib'")
else ()
set (CMAKE_C_CREATE_SHARED_LIBRARY
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ar -ru libopenblas.a && exit 0' "
"sh -c 'ar -ru libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
endif ()
endif()
@ -309,29 +319,36 @@ endif()
#if (MSVC OR NOT NOFORTRAN)
if (NOT NO_CBLAS)
if (NOT ONLY_CBLAS)
# Broken without fortran on unix
add_subdirectory(utest)
add_subdirectory(utest)
endif()
endif()
if (NOT NOFORTRAN)
if (NOT ONLY_CBLAS)
# Build test and ctest
add_subdirectory(test)
if (BUILD_TESTING)
endif()
if (BUILD_TESTING AND NOT BUILD_WITHOUT_LAPACK)
add_subdirectory(lapack-netlib/TESTING)
endif()
endif()
if(NOT NO_CBLAS)
if (NOT ONLY_CBLAS)
add_subdirectory(ctest)
endif()
endif()
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()
if (NOT FIXED_LIBNAME)
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
SOVERSION ${OpenBLAS_MAJOR_VERSION}
)
endif()
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
if (NOT MSVC)
target_link_libraries(${OpenBLAS_LIBNAME}_shared "-Wl,-allow-multiple-definition")
@ -398,21 +415,106 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
if (${BUILD_LAPACK_DEPRECATED})
set (BLD 1)
else ()
set (BLD 0)
endif()
if (${BUILD_BFLOAT16})
set (BBF16 1)
else ()
set (BBF16 0)
endif()
if (${BUILD_SINGLE})
set (BS 1)
else ()
set (BS 0)
endif()
if (${BUILD_DOUBLE})
set (BD 1)
else ()
set (BD 0)
endif()
if (${BUILD_COMPLEX})
set (BC 1)
else ()
set (BC 0)
endif()
if (${BUILD_COMPLEX16})
set (BZ 1)
else ()
set (BZ 0)
endif()
if (NOT USE_PERL)
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
else()
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" "${BUILD_BFLOAT16}" "${BUILD_SINGLE}" "${BUILD_DOUBLE}" "${BUILD_COMPLEX}" "${BUILD_COMPLEX16}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
endif()
endif()
if (BUILD_BENCHMARKS)
#find_package(OpenMP REQUIRED)
file(GLOB SOURCES "benchmark/*.c")
if (NOT USE_OPENMP)
file(GLOB REMFILE "benchmark/smallscaling.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
if (BUILD_WITHOUT_LAPACK)
file(GLOB REMFILE "benchmark/cholesky.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/geev.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/gesv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/getri.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/potrf.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/spmv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/symv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/linpack.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
if (NOT USE_GEMM3M)
file(GLOB REMFILE "benchmark/gemm3m.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
foreach(source ${SOURCES})
get_filename_component(name ${source} NAME_WE)
if ((NOT ${name} STREQUAL "zdot-intel") AND (NOT ${name} STREQUAL "cula_wrapper"))
set(defines DEFAULT COMPLEX DOUBLE "COMPLEX\;DOUBLE")
foreach(define ${defines})
set(target_name "benchmark_${name}")
if (NOT "${define}" STREQUAL "DEFAULT")
string(JOIN "_" define_str ${define})
set(target_name "${target_name}_${define_str}")
endif()
if ((NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_max_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_max_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_min_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_min_COMPLEX_DOUBLE"))
add_executable(${target_name} ${source})
target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} )
# target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} OpenMP::OpenMP_C)
if (NOT "${define}" STREQUAL "DEFAULT")
target_compile_definitions(${target_name} PRIVATE ${define})
endif()
endif()
endforeach()
endif()
endforeach()
endif()
# Install project
@ -503,7 +605,7 @@ if(NOT NO_LAPACKE)
ADD_CUSTOM_TARGET(genlapacke
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()
# Install pkg-config files
@ -511,9 +613,8 @@ configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
set(PN OpenBLAS)
set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}${SUFFIX64}")
set(CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PN}${SUFFIX64}")
configure_package_config_file(cmake/${PN}Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})

View File

@ -23,6 +23,9 @@
* Optimization on AMD Piledriver
* Optimization on Intel Haswell
* Chris Sidebottom <chris.sidebottom@arm.com>
* Optimizations and other improvements targeting AArch64
## Previous Developers
* Zaheer Chothia <zaheer.chothia@gmail.com>
@ -212,4 +215,11 @@ In chronological order:
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
* Pablo Romero <https://github.com/pablorcum>
* [2022-08] Fix building from sources for QNX
* [2022-08] Fix building from sources for QNX
* Mark Seminatore <https://github.com/mseminatore>
* [2023-11-09] Improve Windows threading performance scaling
* [2024-02-09] Introduce MT_TRACE facility and improve code consistency
* Dirreke <https://github.com/mseminatore>
* [2024-01-16] Add basic support for the CSKY architecture

View File

@ -1,4 +1,195 @@
OpenBLAS ChangeLog
====================================================================
Version 0.3.26
2-Jan-2024
general:
- improved the version of openblas.pc that is created by the CMAKE build
- fixed a CMAKE-specific build problem on older versions of MacOS
- worked around linking problems on old versions of MacOS
- corrected installation location of the lapacke_mangling header in CMAKE builds
- added type declarations for complex variables to the MSVC-specific parts of the LAPACK header
- significantly sped up ?GESV for small problem sizes by introducing a lower bound for multithreading
- imported additions and corrections from the Reference-LAPACK project:
- added new LAPACK functions for truncated QR with pivoting (Reference-LAPACK PRs 891&941)
- handle miscalculation of minimum work array size in corner cases (Reference-LAPACK PR 942)
- fixed use of uninitialized variables in ?GEDMD and improved inline documentation (PR 959)
- fixed use of uninitialized variables (and consequential failures) in ?BBCSD (PR 967)
- added tests for the recently introduced Dynamic Mode Decomposition functions (PR 736)
- fixed several memory leaks in the LAPACK testsuite (PR 953)
- fixed counting of testsuite results by the Python script (PR 954)
x86-64:
- fixed computation of CASUM on SkylakeX and newer targets in the special
case that AVX512 is not supported by the compiler or operating environment
- fixed potential undefined behaviour in the CASUM/ZASUM kernels for AVX512 targets
- worked around a problem in the pre-AVX kernels for GEMV
- sped up the thread management code on MS Windows
arm64:
- fixed building of the LAPACK testsuite with Xcode 15 on Apple M1 and newer
- sped up the thread management code on MS Windows
- sped up SGEMM and DGEMM on Neoverse V1 and N1
- sped up ?DOT on SVE-capable targets
- reduced the number of targets in DYNAMIC_ARCH builds by eliminating functionally equivalent ones
- included support for Apple M1 and newer targets in DYNAMIC_ARCH builds
power:
- improved the SGEMM kernel for POWER10
- fixed compilation with (very) old versions of gcc
- fixed detection of old 32bit PPC targets in CMAKE-based builds
- added autodetection of the POWERPC 7400 subtype
- fixed CMAKE-based compilation for PPCG4 and PPC970 targets
loongarch64:
- added and improved optimized kernels for almost all BLAS functions
====================================================================
Version 0.3.25
12-Nov-2023
general:
- improved the error message shown on exceeding the maximum thread count
- improved the code to add supplementary thread buffers in case of overflow
- fixed a potential division by zero in ?ROTG
- improved the ?MATCOPY functions to accept zero-sized rows or columns
- corrected empty prototypes in function declarations
- cleaned up unused declarations in the f2c-converted versions of the LAPACK sources
- fixed compilation with the Cray CCE Compiler suite
- improved link line rewriting to avoid mixed libgomp/libomp builds with clang&gfortran
- worked around OPENMP builds with LLVM14's libomp hanging on FreeBSD
- improved the Makefiles to require less option duplication on "make install"
- imported the following changes from the upcoming release 3.12 of Reference-LAPACK
- deprecate utility functions ?GELQS and ?GEQRS (LAPACK PR 900)
- apply rounding up to workspace calculations done in floating point (LAPACK PR 904)
- avoid overflow in STGEX2/DTGEX2 (LAPACK PR 907)
- fix accumulation in ?LASSQ (LAPACK PR 909)
- fix handling of NaN values in ?GECON (LAPACK PR 926)
- avoid overflow in CBDSQR/ZBDSQR (LAPACK PR 927)
- fix poor vector orthogonalizations in ?ORBDB5/?UNBDB5 (LAPACK PR 928 & 930)
x86-64:
- fixed compile-time autodetection of AMD Ryzen3 and Ryzen4 cpus
- fixed capability-based fallback selection for unknown cpus in DYNAMIC_ARCH
- added AVX512 optimizations for ?ASUM on Sapphire Rapids and Cooper Lake
ARM64:
- fixed building on Apple with homebrew gcc
- fixed building with XCODE 15
- fixed building on A64FX and Cortex A710/X1/X2
- increased the default buffer size for recent ARM server cpus
POWER:
- fixed building with the IBM xlf 16.1.1 compiler
- fixed building with IBM XL C
- added support for DYNAMIC_ARCH builds with clang
- fixed union declaration in the BFLOAT16 test case
- enable optimizations for the AIX assembler on POWER10
LOONGARCH64:
- added an optimized SGEMV kernel
- added an optimized DTRSM kernel
====================================================================
Version 0.3.24
03-Sep-2023
general:
- declared the arguments of cblas_xerbla as const (in accordance with the reference implementation
and others, the previous discrepancy appears to have dated back to GotoBLAS)
- fixed the implementation of ?GEMMT that was added in 0.3.23
- made cpu-specific SWITCH_RATIO parameters for GEMM available to DYNAMIC_ARCH builds
- fixed application of SYMBOLSUFFIX in CMAKE builds
- fixed missing SSYCONVF function in the shared library
- fixed parallel build logic used with gmake
- added support for compilation with LLVM17, in particular its new Fortran compiler
- added support for CMAKE builds using the NVIDIA HPC compiler
- fixed INTERFACE64 builds with CMAKE and the f95 Fortran compiler
- fixed cross-build detection and management in c_check
- disabled building of the tests with CMAKE when ONLY_CBLAS is defined
- fixed several issues with the handling of runtime limits on the number of OPENMP threads
- corrected the error code returned by SGEADD/DGEADD when LDA is too small
- corrected the error code returned by IMATCOPY when LDB is too small
- updated ?NRM2 to support negative increment values (as introduced in release 3.10
of the reference BLAS)
- fixed OpenMP builds with CLANG for the case where libomp is not in a standard location
- fixed a potential overwrite of unrelated memory during thread initialisation on startup
- fixed a potential integer overflow in the multithreading threshold for ?SYMM/?SYRK
- fixed build of the LAPACKE interfaces for the LAPACK 3.11.0 ?TRSYL functions added in 0.3.22
- fixed installation of .cmake files in concurrent 32 and 64bit builds with CMAKE
- applied additions and corrections from the development branch of Reference-LAPACK:
- fixed actual arguments passed to a number of LAPACK functions (from Reference-LAPACK PR 885)
- fixed workspace query results in LAPACK ?SYTRF/?TRECV3 (from Reference-LAPACK PR 883)
- fixed derivation of the UPLO parameter in LAPACKE_?larfb (from Reference-LAPACK PR 878)
- fixed a crash in LAPACK ?GELSDD on NRHS=0 (from Reference-LAPACK PR 876)
- added new LAPACK utility functions CRSCL and ZRSCL (from Reference-LAPACK PR 839)
- corrected the order of eigenvalues for 2x2 matrices in ?STEMR (Reference-LAPACK PR 867)
- removed spurious reference to OpenMP variables outside OpenMP contexts (Reference-LAPACK PR 860)
- updated file comments on use of LAMBDA variable in LAPACK (Reference-LAPACK PR 852)
- fixed documentation of LAPACK SLASD0/DLASD0 (Reference-LAPACK PR 855)
- fixed confusing use of "minor" in LAPACK documentation (Reference-LAPACK PR 849)
- added new LAPACK functions ?GEDMD for dynamic mode decomposition (Reference-LAPACK PR 736)
- fixed potential stack overflows in the EIG part of the LAPACK testsuite (Reference-LAPACK PR 854)
- applied small improvements to the variants of Cholesky and QR functions (Reference-LAPACK PR 847)
- removed unused variables from LAPACK ?BDSQR (Reference-LAPACK PR 832)
- fixed a potential crash on allocation failure in LAPACKE SGEESX/DGEESX (Reference-LAPACK PR 836)
- added a quick return from SLARUV/DLARUV for N < 1 (Reference-LAPACK PR 837)
- updated function descriptions in LAPACK ?GEGS/?GEGV (Reference-LAPACK PR 831)
- improved algorithm description in ?GELSY (Reference-LAPACK PR 833)
- fixed scaling in LAPACK STGSNA/DTGSNA (Reference-LAPACK PR 830)
- fixed crash in LAPACKE_?geqrt with row-major data (Reference-LAPACK PR 768)
- added LAPACKE interfaces for C/ZUNHR_COL and S/DORHR_COL (Reference-LAPACK PR 827)
- added error exit tests for SYSV/SYTD2/GEHD2 to the testsuite (Reference-LAPACK PR 795)
- fixed typos in LAPACK source and comments (Reference-LAPACK PRs 809,811,812,814,820)
- adopt refactored ?GEBAL implementation (Reference-LAPACK PR 808)
x86_64:
- added cpu model autodetection for Intel Alder Lake N
- added activation of the AMX tile to the Sapphire Rapids SBGEMM kernel
- worked around miscompilations of GEMV/SYMV kernels by gcc's tree-vectorizer
- fixed compilation of Cooperlake and Sapphire Rapids kernels with CLANG
- fixed runtime detection of Cooperlake and Sapphire Rapids in DYNAMIC_ARCH
- fixed feature-based cputype fallback in DYNAMIC_ARCH
- added support for building the AVX512 kernels with the NVIDIA HPC compiler
- corrected ZAXPY result on old pre-AVX hardware for the INCX=0 case
- fixed a potential use of uninitialized variables in ZTRSM
ARM64:
- added cpu model autodetection for Apple M2
- fixed wrong results of CGEMM/CTRMM/DNRM2 under OSX (use of reserved register)
- added support for building the SVE kernels with the NVIDIA HPC compiler
- added support for building the SVE kernels with the Apple Clang compiler
- fixed compiler option handling for building the SVE kernels with LLVM
- implemented SWITCH_RATIO parameter for improved GEMM performance on Neoverse
- activated SVE SGEMM and DGEMM kernels for Neoverse V1
- improved performance of the SVE CGEMM and ZGEMM kernels on Neoverse V1
- improved kernel selection for the ARMV8SVE target and added it to DYNAMIC_ARCH
- fixed runtime check for SVE availability in DYNAMIC_ARCH builds to take OS or
container restrictions into account
- fixed a potential use of uninitialized variables in ZTRSM
- fix a potential misdetection of ARMV8 hardware as 32bit in CMAKE builds
LOONGARCH64:
- added ABI detection
- added support for cpu affinity handling
- fixed compilation with early versions of the Loongson toolchain
- added an optimized SGEMM kernel for 3A5000
- added optimized DGEMV kernels for 3A5000
- improved the performance of the DGEMM kernel for 3A5000
MIPS64:
- fixed miscompilation of TRMM kernels for the MIPS64_GENERIC target
POWER:
- fixed compiler warnings in the POWER10 SBGEMM kernel
RISCV:
- fixed application of the INTERFACE64 option when building with CMAKE
- fix a potential misdetection of RISCV hardware as 32bit in CMAKE builds
- fixed IDAMAX and DOT kernels for C910V
- fixed corner cases in the ROT and SWAP kernels for C910V
- fixed compilation of the C910V target with recent vendor compilers
====================================================================
Version 0.3.23
01-Apr-2023

View File

@ -11,7 +11,7 @@
operation is finished.
2. Simlar problem may happen under virtual machine. If supervisor
2. Similar problem may happen under virtual machine. If supervisor
allocates different cores for each scheduling, BLAS performnace
will be bad. This is because BLAS also utilizes all cache,
unexpected re-schedule for different core may result of heavy

15
Jenkinsfile vendored
View File

@ -1,9 +1,14 @@
node {
stage('Checkout') {
checkout
pipeline {
agent {
docker {
image 'osuosl/ubuntu-s390x'
}
}
stages {
stage('Build') {
sh("make")
steps {
sh 'make clean && make'
}
}
}
}

16
Jenkinsfile.pwr Normal file
View File

@ -0,0 +1,16 @@
pipeline {
agent {
docker {
image 'osuosl/ubuntu-ppc64le'
}
}
stages {
stage('Build') {
steps {
sh 'sudo apt update'
sh 'sudo apt install gfortran -y'
sh 'make clean && make'
}
}
}
}

View File

@ -1,5 +1,9 @@
TOPDIR = .
include ./Makefile.system
LNCMD = ln -fs
ifeq ($(FIXED_LIBNAME), 1)
LNCMD = true
endif
BLASDIRS = interface driver/level2 driver/level3 driver/others
@ -35,14 +39,18 @@ export NO_LAPACK
export C_LAPACK
endif
ifeq ($(F_COMPILER),CRAY)
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS))
else
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
endif
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
.PHONY : all libs netlib $(RELA) test ctest shared install
.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test
.NOTPARALLEL : shared
all :: libs netlib $(RELA) tests shared
all :: tests
@echo
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
@echo
@ -130,17 +138,17 @@ shared : libs netlib $(RELA)
ifneq ($(NO_SHARED), 1)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
@$(MAKE) -C exports so
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@$(MAKE) -C exports dyn
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
endif
ifeq ($(OSNAME), WINNT)
@$(MAKE) -C exports dll
@ -148,9 +156,12 @@ endif
ifeq ($(OSNAME), CYGWIN_NT)
@$(MAKE) -C exports dll
endif
ifeq ($(OSNAME), AIX)
@$(MAKE) -C exports so
endif
endif
tests : libs netlib $(RELA) shared
tests : shared
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
touch $(LIBNAME)
ifndef NO_FBLAS
@ -206,16 +217,32 @@ ifeq ($(DYNAMIC_OLDER), 1)
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
endif
endif
@echo TARGET=$(CORE) >> Makefile.conf_last
ifdef USE_THREAD
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
endif
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
ifdef SMP
ifdef NUM_THREADS
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last
else
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last
endif
endif
ifeq ($(USE_OPENMP),1)
@echo USE_OPENMP=1 >> Makefile.conf_last
endif
ifeq ($(INTERFACE64),1)
@echo INTERFACE64=1 >> Makefile.conf_last
endif
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@touch lib.grd
prof : prof_blas prof_lapack
prof_blas :
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
for d in $(SUBDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d prof || exit 1 ; \
@ -226,7 +253,7 @@ ifeq ($(DYNAMIC_ARCH), 1)
endif
blas :
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
for d in $(BLASDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d libs || exit 1 ; \
@ -234,7 +261,7 @@ blas :
done
hpl :
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
for d in $(BLASDIRS) ../laswp exports ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
@ -248,7 +275,7 @@ ifeq ($(DYNAMIC_ARCH), 1)
endif
hpl_p :
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
for d in $(SUBDIRS) ../laswp exports ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
@ -373,14 +400,15 @@ ifneq ($(CROSS), 1)
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
endif
lapack-runtest:
lapack-runtest: lapack-test
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING )
blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)

File diff suppressed because it is too large Load Diff

View File

@ -69,7 +69,7 @@ endif
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
@ -92,26 +92,37 @@ endif
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEV1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-v1
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.4-a+sve
ifeq (1, $(ISCLANG))
CCOMMON_OPT += -mtune=cortex-x1
else
CCOMMON_OPT += -mtune=neoverse-v1
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
endif
else
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=native
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(CROSS), 1)
CCOMMON_OPT += -mtune=native
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=native
FCOMMON_OPT += -march=armv8.4-a
ifneq ($(CROSS), 1)
FCOMMON_OPT += -mtune=native
endif
endif
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
@ -122,30 +133,36 @@ endif
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEN2)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ10), 1)
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
ifneq ($(OSNAME), Darwin)
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
endif
else
CCOMMON_OPT += -march=armv8.5-a+sve -mtune=native
CCOMMON_OPT += -march=armv8.5-a+sve
ifneq ($(CROSS), 1)
CCOMMON_OPT += -mtune=native
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a -mtune=native
FCOMMON_OPT += -march=armv8.5-a
ifneq ($(CROSS), 1)
FCOMMON_OPT += -mtune=native
endif
endif
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
@ -155,7 +172,7 @@ endif
# Use a53 tunings because a55 is only available in GCC>=8.1
ifeq ($(CORE), CORTEXA55)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq ($(GCCVERSIONGTEQ8), 1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
@ -196,8 +213,13 @@ endif
endif
ifeq ($(CORE), THUNDERX3T110)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.3-a
ifeq (0, $(ISCLANG))
CCOMMON_OPT += -mtune=thunderx3t110
else
CCOMMON_OPT += -mtune=thunderx2t99
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
endif
@ -225,9 +247,12 @@ endif
endif
endif
ifeq ($(GCCVERSIONGTEQ9), 1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
ifeq ($(CORE), EMAG8180)
CCOMMON_OPT += -march=armv8-a -mtune=emag
CCOMMON_OPT += -march=armv8-a
ifeq ($(ISCLANG), 0)
CCOMMON_OPT += -mtune=emag
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=emag
endif

4
Makefile.csky Normal file
View File

@ -0,0 +1,4 @@
ifeq ($(CORE), CK860FV)
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static
endif

View File

@ -2,6 +2,18 @@ TOPDIR = .
export GOTOBLAS_MAKEFILE = 1
-include $(TOPDIR)/Makefile.conf_last
include ./Makefile.system
LNCMD = ln -fs
ifdef THELIBNAME
LIBNAME=$(THELIBNAME)
LIBSONAME=$(THELIBSONAME)
endif
ifeq ($(FIXED_LIBNAME), 1)
LNCMD = true
endif
ifeq ($(INTERFACE64),1)
USE_64BITINT=1
endif
PREFIX ?= /opt/OpenBLAS
@ -91,7 +103,7 @@ ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@install -m644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifneq ($(NO_SHARED),1)
@ -99,21 +111,21 @@ ifneq ($(NO_SHARED),1)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@install -m755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
endif
ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@ -141,15 +153,15 @@ ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
endif
@ -162,6 +174,8 @@ endif
@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
@echo 'libprefix='$(LIBNAMEPREFIX) >> "$(PKGFILE)"
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
@ -178,7 +192,7 @@ endif
ifneq ($(NO_SHARED),1)
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"

View File

@ -11,11 +11,23 @@ endif
ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
ifeq ($(C_COMPILER), GCC)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave
else ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
endif
else
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
endif
endif
@ -38,19 +50,18 @@ CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -fno-fast-math
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
endif
ifeq ($(C_COMPILER), GCC)
ifeq ($(F_COMPILER), GFORTRAN)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
FCOMMON_OPT += -mcpu=power8 -mtune=power8
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
FCOMMON_OPT += -O2 -Mrecursive
@ -66,12 +77,16 @@ endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(OSNAME), AIX)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
endif
else
FCOMMON_OPT += -O2 -Mrecursive
@ -84,13 +99,20 @@ CCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
CCOMMON_OPT += -DUSE_OPENMP -mp
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -DUSE_OPENMP
else
ifneq ($(F_COMPILER), PGI)
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
FCOMMON_OPT += -DUSE_OPENMP -mp
endif
endif
endif
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -fno-integrated-as
endif
# workaround for C->FORTRAN ABI violation in LAPACKE
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -fno-optimize-sibling-calls
@ -125,8 +147,19 @@ endif
ifdef BINARY64
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
$(error Using GCC and XLF on AIX is not a supported combination.)
endif
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
$(error Using Clang and gFortran on AIX is not a supported combination.)
endif
ifeq ($(OSNAME), AIX)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mpowerpc64 -maix64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mpowerpc64 -maix64
endif

View File

@ -55,6 +55,26 @@ ifeq ($(TARGET), C910V)
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d
endif
ifeq ($(TARGET), CK860FV)
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
endif
ifeq ($(TARGET), x280)
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
endif
ifeq ($(TARGET), RISCV64_ZVL256B)
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
endif
ifeq ($(TARGET), RISCV64_ZVL128B)
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
endif
ifeq ($(TARGET), RISCV64_GENERIC)
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
endif
all: getarch_2nd
./getarch_2nd 0 >> $(TARGET_MAKE)
./getarch_2nd 1 >> $(TARGET_CONF)

View File

@ -2,3 +2,19 @@ ifeq ($(CORE), C910V)
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
endif
ifeq ($(CORE), x280)
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
endif
ifeq ($(CORE), RISCV64_ZVL256B)
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
endif
ifeq ($(CORE), RISCV64_ZVL128B)
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -static
endif
ifeq ($(CORE), RISCV64_GENERIC)
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d -static
endif

View File

@ -3,7 +3,12 @@
#
# This library's version
VERSION = 0.3.23
VERSION = 0.3.26.dev
# If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a
# and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library
#
# LIBNAMEPREFIX = scipy
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library

View File

@ -277,10 +277,6 @@ endif
ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1
# Determine if the assembler is GNU Assembler
HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?)
GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
# Generating Makefile.conf and config.h
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) DYNAMIC_ARCH=$(DYNAMIC_ARCH) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
@ -384,6 +380,11 @@ GCCMINORVERSIONGTEQ4 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
endif
ifeq ($(C_COMPILER), CLANG)
CLANGVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
CLANGVERSIONGTEQ12 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 12)
endif
#
# OS dependent settings
#
@ -392,11 +393,22 @@ ifeq ($(OSNAME), Darwin)
ifndef MACOSX_DEPLOYMENT_TARGET
ifeq ($(ARCH), arm64)
export MACOSX_DEPLOYMENT_TARGET=11.0
ifeq ($(C_COMPILER), GCC)
export NO_SVE = 1
endif
else
export MACOSX_DEPLOYMENT_TARGET=10.8
endif
endif
MD5SUM = md5 -r
XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.Xcode |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.)
ifeq (x$(XCVER)x,xx)
XCVER = $(shell pkgutil --pkg-info=com.apple.pkg.CLTools_Executables |awk '/version:/ {print $2}'|cut -d: -f2|cut -f1 -d.)
endif
ifeq (x$(XCVER), x 15)
CCOMMON_OPT += -Wl,-ld_classic
FCOMMON_OPT += -Wl,-ld_classic
endif
endif
ifneq (,$(findstring $(OSNAME), FreeBSD OpenBSD DragonFly))
@ -597,6 +609,9 @@ endif
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -fopenmp
ifeq ($(F_COMPILER), GFORTRAN)
FEXTRALIB := $(subst -lgomp,-lomp,$(FEXTRALIB))
endif
endif
ifeq ($(C_COMPILER), INTEL)
@ -645,7 +660,7 @@ DYNAMIC_CORE += HASWELL ZEN
endif
ifneq ($(NO_AVX512), 1)
ifneq ($(NO_AVX2), 1)
DYNAMIC_CORE += SKYLAKEX COOPERLAKE
DYNAMIC_CORE += SKYLAKEX COOPERLAKE SAPPHIRERAPIDS
endif
endif
endif
@ -662,15 +677,12 @@ ifeq ($(ARCH), arm64)
DYNAMIC_CORE = ARMV8
DYNAMIC_CORE += CORTEXA53
DYNAMIC_CORE += CORTEXA57
DYNAMIC_CORE += CORTEXA72
DYNAMIC_CORE += CORTEXA73
DYNAMIC_CORE += NEOVERSEN1
ifneq ($(NO_SVE), 1)
DYNAMIC_CORE += NEOVERSEV1
DYNAMIC_CORE += NEOVERSEN2
DYNAMIC_CORE += ARMV8SVE
endif
DYNAMIC_CORE += CORTEXA55
DYNAMIC_CORE += FALKOR
DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99
DYNAMIC_CORE += TSV110
@ -744,7 +756,11 @@ DYNAMIC_CORE += POWER9
else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif
ifeq ($(OSNAME), AIX)
LDVERSIONGTEQ35 := 1
else
LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35)
endif
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT
@ -857,6 +873,11 @@ endif
endif
endif
ifeq ($(ARCH), csky)
NO_BINARY_MODE = 1
BINARY_DEFINED = 1
endif
#
# C Compiler dependent settings
#
@ -932,8 +953,12 @@ BINARY_DEFINED = 1
endif
ifeq ($(ARCH), loongarch64)
CCOMMON_OPT += -march=loongarch64 -mabi=lp64
FCOMMON_OPT += -march=loongarch64 -mabi=lp64
LA64_ABI=$(shell $(CC) -mabi=lp64d -c $(TOPDIR)/cpuid_loongarch64.c -o /dev/null > /dev/null 2> /dev/null && echo lp64d)
ifneq ($(LA64_ABI), lp64d)
LA64_ABI=lp64
endif
CCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
FCOMMON_OPT += -march=loongarch64 -mabi=$(LA64_ABI)
endif
endif
@ -1082,8 +1107,9 @@ endif
endif
endif
ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(F_COMPILER), $(filter $(F_COMPILER),GFORTRAN FLANGNEW))
CCOMMON_OPT += -DF_INTERFACE_GFORT
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -Wall
# make single-threaded LAPACK calls thread-safe #1847
FCOMMON_OPT += -frecursive
@ -1097,6 +1123,7 @@ EXTRALIB += -lgfortran
endif
endif
endif
endif
ifdef NO_BINARY_MODE
ifeq ($(ARCH), $(filter $(ARCH),mips64))
ifdef BINARY64
@ -1152,6 +1179,10 @@ endif
ifeq ($(F_COMPILER), IBM)
CCOMMON_OPT += -DF_INTERFACE_IBM
FEXTRALIB += -lxlf90
ifeq ($(C_COMPILER), $(filter $(C_COMPILER),GCC CLANG))
FCOMMON_OPT += -qextname
endif
# FCOMMON_OPT += -qarch=440
ifdef BINARY64
FCOMMON_OPT += -q64
@ -1348,6 +1379,8 @@ ifeq ($(F_COMPILER), SUN)
FCOMMON_OPT += -pic
else ifeq ($(F_COMPILER), NAG)
FCOMMON_OPT += -PIC
else ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -qpic=large
else
FCOMMON_OPT += -fPIC
endif
@ -1483,16 +1516,28 @@ ifndef LIBSONAMEBASE
LIBSONAMEBASE = openblas
endif
ifndef LIBNAMEPREFIX
LIBNAMEPREFIX =
endif
SYMPREFIX=$(SYMBOLPREFIX)
ifeq ($(SYMBOLPREFIX),$(LIBNAMEPREFIX))
SYMPREFIX=
endif
SYMSUFFIX=$(SYMBOLSUFFIX)
ifeq ($(SYMBOLSUFFIX),$(LIBNAMESUFFIX))
SYMSUFFIX=
endif
ifndef LIBNAMESUFFIX
LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)
LIBNAMEBASE = $(SYMPREFIX)$(LIBSONAMEBASE)$(SYMSUFFIX)
else
LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
LIBNAMEBASE = $(SYMPREFIX)$(LIBSONAMEBASE)$(SYMSUFFIX)$(LIBNAMESUFFIX)
endif
ifeq ($(OSNAME), CYGWIN_NT)
LIBPREFIX = cyg$(LIBNAMEBASE)
LIBPREFIX = cyg$(LIBNAMEPREFIX)$(LIBNAMEBASE)
else
LIBPREFIX = lib$(LIBNAMEBASE)
LIBPREFIX = lib$(LIBNAMEPREFIX)$(LIBNAMEBASE)
endif
KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
@ -1600,9 +1645,11 @@ override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF)
ifeq ($(NEED_PIC), 1)
ifeq (,$(findstring PIC,$(FFLAGS)))
ifneq ($(F_COMPILER),IBM)
override FFLAGS += -fPIC
endif
endif
endif
#For LAPACK Fortran codes.
#Disable -fopenmp for LAPACK Fortran codes on Windows.
@ -1616,11 +1663,11 @@ endif
ifeq ($(F_COMPILER),NAG)
LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
endif
ifeq ($(F_COMPILER),CRAY)
LAPACK_FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
override FFLAGS := $(filter-out -msse3 -mssse3 -msse4.1 -mavx -mavx2 -mskylake-avx512 ,$(FFLAGS))
endif
LAPACK_CFLAGS = $(CFLAGS)
@ -1669,14 +1716,14 @@ LIBNAME_P = $(LIBPREFIX)p$(REVISION)_p.$(LIBSUFFIX)
endif
endif
ifeq ($(FIXED_LIBNAME),1)
LIBNAME = lib$(LIBNAMEPREFIX)$(LIBSONAMEBASE)$(LIBNAMESUFFIX).$(LIBSUFFIX)
LIBNAME_P = lib$(LIBNAMEPREFIX)$(LISOBNAMEBASE)$(LIBNAMESUFFIX)_p.$(LIBSUFFIX)
endif
LIBDLLNAME = $(LIBPREFIX).dll
IMPLIBNAME = lib$(LIBNAMEBASE).dll.a
ifneq ($(OSNAME), AIX)
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.so)
else
LIBSONAME = $(LIBNAME:.$(LIBSUFFIX)=.a)
endif
LIBDYNNAME = $(LIBNAME:.$(LIBSUFFIX)=.dylib)
LIBDEFNAME = $(LIBNAME:.$(LIBSUFFIX)=.def)
LIBEXPNAME = $(LIBNAME:.$(LIBSUFFIX)=.exp)
@ -1763,6 +1810,8 @@ export TARGET_CORE
export NO_AVX512
export NO_AVX2
export BUILD_BFLOAT16
export NO_LSX
export NO_LASX
export SBGEMM_UNROLL_M
export SBGEMM_UNROLL_N

View File

@ -75,18 +75,31 @@ endif
ifeq ($(CORE), COOPERLAKE)
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# cooperlake support was added in 10.1
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
CCOMMON_OPT += -march=cooperlake
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=cooperlake
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
# cooperlake support was added in 10.1
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
CCOMMON_OPT += -march=cooperlake
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=cooperlake
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
else ifeq ($(C_COMPILER), CLANG)
# cooperlake support was added in clang 9
ifeq ($(CLANGVERSIONGTEQ9), 1)
CCOMMON_OPT += -march=cooperlake
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=cooperlake
endif
else # not supported in clang, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
@ -104,18 +117,31 @@ endif
ifeq ($(CORE), SAPPHIRERAPIDS)
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# sapphire rapids support was added in 11
ifeq ($(GCCVERSIONGTEQ11), 1)
CCOMMON_OPT += -march=sapphirerapids
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=sapphirerapids
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
# sapphire rapids support was added in 11
ifeq ($(GCCVERSIONGTEQ11), 1)
CCOMMON_OPT += -march=sapphirerapids
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=sapphirerapids
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
else ifeq ($(C_COMPILER), CLANG)
# sapphire rapids support was added in clang 12
ifeq ($(CLANGVERSIONGTEQ12), 1)
CCOMMON_OPT += -march=sapphirerapids
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=sapphirerapids
endif
else # not supported in clang, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables

View File

@ -6,11 +6,15 @@ Travis CI: [![Build Status](https://travis-ci.com/xianyi/OpenBLAS.svg?branch=dev
AppVeyor: [![Build status](https://ci.appveyor.com/api/projects/status/09sohd35n8nkkx64/branch/develop?svg=true)](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
Drone CI: [![Build Status](https://cloud.drone.io/api/badges/xianyi/OpenBLAS/status.svg?branch=develop)](https://cloud.drone.io/xianyi/OpenBLAS/)
Cirrus CI: [![Build Status](https://api.cirrus-ci.com/github/xianyi/OpenBLAS.svg?branch=develop)](https://cirrus-ci.com/github/xianyi/OpenBLAS)
<!-- Drone CI: [![Build Status](https://cloud.drone.io/api/badges/xianyi/OpenBLAS/status.svg?branch=develop)](https://cloud.drone.io/xianyi/OpenBLAS/)-->
[![Build Status](https://dev.azure.com/xianyi/OpenBLAS/_apis/build/status/xianyi.OpenBLAS?branchName=develop)](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
OSUOSL POWERCI [![Build Status](https://powerci.osuosl.org/buildStatus/icon?job=OpenBLAS_gh%2Fdevelop)](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/)
OSUOSL IBMZ-CI [![Build Status](http://ibmz-ci.osuosl.org/buildStatus/icon?job=OpenBLAS-Z%2Fdevelop)](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/)
## Introduction
OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.
@ -50,10 +54,15 @@ Building OpenBLAS requires the following to be installed:
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
The full target list is in the file `TargetList.txt`. For building with `cmake`, the
usual conventions apply, i.e. create a build directory either underneath the toplevel
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path
to the source tree and any build options you plan to set.
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
can either be set there (typically by removing the comment character from the respective line), or used on the
`make` command line.
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
in the build step, as some settings like the supported maximum number of threads are automatically derived from the
build host by default, which might not be what you want.
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
build options you plan to set.
### Cross compile
@ -113,7 +122,7 @@ Use `PREFIX=` when invoking `make`, for example
```sh
make install PREFIX=your_installation_directory
```
(along with all options you added on the `make` command line in the preceding build step)
The default installation directory is `/opt/OpenBLAS`.
## Supported CPUs and Operating Systems
@ -133,7 +142,7 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
- **AMD ZEN**: Uses Haswell codes with some optimizations.
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)
#### MIPS32
@ -165,13 +174,16 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **TSV110**: Optimized some Level-3 helper functions
- **EMAG 8180**: preliminary support based on A57
- **Neoverse N1**: (AWS Graviton2) preliminary support
- **Apple Vortex**: preliminary support based on ARMV8
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
- **Apple Vortex**: preliminary support based on ThunderX2/3
- **A64FX**: preliminary support, optimized Level-3 BLAS
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions
#### PPC/PPC64
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
- **POWER10**:
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.
#### IBM zEnterprise System
@ -184,20 +196,37 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
```sh
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran
```
(also known to work on C906)
(also known to work on C906 as long as you use only single-precision functions - its instruction set support appears to be incomplete in double precision)
- **x280**: Level-3 BLAS and Level-1,2 are optimized by RISC-V Vector extension 1.0.
```sh
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
```
- **ZVL???B**: Level-3 BLAS and Level-1,2 including vectorised kernels targeting generic RISCV cores with vector support with registers of at least the corresponding width; ZVL128B and ZVL256B are available.
e.g.:
```sh
make TARGET=RISCV64_ZVL256B CFLAGS="-DTARGET=RISCV64_ZVL256B" \
BINARY=64 ARCH=riscv64 CC='clang -target riscv64-unknown-linux-gnu' \
AR=riscv64-unknown-linux-gnu-ar AS=riscv64-unknown-linux-gnu-gcc \
LD=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran \
HOSTCC=gcc HOSTFC=gfortran -j
```
### Support for multiple targets in a single library
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX, Cooper Lake, Sapphire Rapids. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus.
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. If compiler support for SVE is available at build time, support for NeoverseN2, NeoverseV1 as well as generic ArmV8SVE targets is also enabled.
For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14.
For **POWER**, the list encompasses POWER6, POWER8 and POWER9. POWER10 is additionally available if a sufficiently recent compiler is used for the build.
on **ZARCH** it comprises Z13 and Z14 as well as generic zarch support.
The `TARGET` option can be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the
common code in the library, usually you will want to set this to the oldest model you expect to encounter.

View File

@ -118,8 +118,11 @@ Z13
Z14
10.RISC-V 64:
RISCV64_GENERIC
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
RISCV64_ZVL128B
C910V
x280
RISCV64_ZVL256B
11.LOONGARCH64:
LOONGSONGENERIC
@ -133,3 +136,7 @@ E2K
EV4
EV5
EV6
14.CSKY
CSKY
CK860FV

View File

@ -115,7 +115,7 @@ jobs:
mkdir build
cd build
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release
ctest
@ -167,11 +167,10 @@ jobs:
- job: OSX_OpenMP_Clang
pool:
vmImage: 'macOS-11'
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOSX_DEPLOYMENT_TARGET: 11.0
steps:
- script: |
brew update
@ -180,7 +179,7 @@ jobs:
- job: OSX_OpenMP_Clang_cmake
pool:
vmImage: 'macOS-11'
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
@ -210,7 +209,7 @@ jobs:
- job: OSX_Ifort_Clang
pool:
vmImage: 'macOS-11'
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/irc_nas/17643/m_HPCKit_p_2021.2.0.2903_offline.dmg
@ -271,14 +270,27 @@ jobs:
- script: |
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
- job: OSX_xbuild_DYNAMIC_ARM64
pool:
vmImage: 'macOS-11'
variables:
CC: /Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX11.3.sdk -arch arm64
steps:
- script: |
ls /Applications/Xcode_12.5.1.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
/Applications/Xcode_12.5.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus
/Applications/Xcode_11.7.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
- job: ALPINE_MUSL
pool:
vmImage: 'ubuntu-latest'
steps:
- script: |
wget https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.13.2/alpine-chroot-install \
&& echo '60c7e0b5d82e21d1a549fc9a46ba3b36688c09dc alpine-chroot-install' | sha1sum -c \
|| exit 1
wget https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.14.0/alpine-chroot-install \
&& echo 'ccbf65f85cdc351851f8ad025bb3e65bae4d5b06 alpine-chroot-install' | sha1sum -c \
|| exit 1
alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers sudo'
alpine make DYNAMIC_ARCH=1 BINARY=64

View File

@ -37,6 +37,12 @@ ESSL=/opt/ibm/lib
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
# x280 temporary workaround for gfortran
ifeq ($(TARGET), x280)
CCOMMON_OPT:=$(filter-out -mllvm --riscv-v-vector-bits-min=512,$(CCOMMON_OPT))
endif
ifneq ($(NO_LAPACK), 1)
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
@ -3436,4 +3442,4 @@ smallscaling: smallscaling.c ../$(LIBNAME)
clean ::
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl smallscaling
include $(TOPDIR)/Makefile.tail
include $(TOPDIR)/Makefile.tail

0
benchmark/spr.c Executable file → Normal file
View File

0
benchmark/spr2.c Executable file → Normal file
View File

View File

@ -1,5 +1,5 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
Copyright (c) 2014, 2023 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@ -67,7 +67,7 @@ int main(int argc, char *argv[]){
int step = 1;
int loops = 1;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
double time1,timeg;
@ -77,7 +77,7 @@ int main(int argc, char *argv[]){
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c Loops = %d\n", from, to, step,uplo,trans,loops);
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){

View File

@ -127,7 +127,7 @@ int main(int argc, char *argv[]){
long long muls = n*(n+1)/2.0;
long long adds = (n - 1.0)*n/2.0;
fprintf(stderr, "%10d %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
fprintf(stderr, "%10d : %10.2f MFlops %10.6f sec\n", n,(muls+adds) / timeg * 1.e-6, timeg);
if(a != NULL){
free(a);
}

100
c_check
View File

@ -31,13 +31,17 @@ flags="$*"
cross_suffix=""
if [ "`dirname \"$compiler_name\"`" != '.' ]; then
cross_suffix="$cross_suffix`dirname \"$compiler_name\"`/"
if [ "`dirname "$compiler_name"`" != '.' ]; then
cross_suffix="$cross_suffix`dirname "$compiler_name"`/"
fi
bn=`basename $compiler_name`
cn=`echo $compiler_name | sed -e 's/ -.*//'`
bn=`basename "$cn"`
case "$bn" in
*-*) cross_suffix="$cross_suffix${bn%-*}-"
*-*) if [ "$bn" != '-' ]; then
cross_suffix="$cross_suffix${bn%-*}-"
fi
esac
compiler=""
@ -87,16 +91,25 @@ case "$data" in
*ARCH_ZARCH*) architecture=zarch ;;
*ARCH_RISCV64*) architecture=riscv64 ;;
*ARCH_LOONGARCH64*) architecture=loongarch64 ;;
*ARCH_CSKY*) architecture=csky ;;
esac
defined=0
if [ "$os" = "AIX" ]; then
case "$BINARY" in
32) compiler_name="$compiler_name -maix32" ;;
64) compiler_name="$compiler_name -maix64" ;;
esac
defined=1
if [ "$compiler" = "GCC" ]; then
case "$BINARY" in
32) compiler_name="$compiler_name -maix32" ;;
64) compiler_name="$compiler_name -maix64" ;;
esac
defined=1
else
case "$BINARY" in
32) compiler_name="$compiler_name -m32" ;;
64) compiler_name="$compiler_name -m64" ;;
esac
defined=1
fi
fi
case "$architecture" in
@ -164,7 +177,7 @@ fi
no_msa=0
if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
tmpd="$(mktemp -d)"
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
tmpf="$tmpd/a.c"
code='"addvi.b $w0, $w1, 1"'
msa_flags='-mmsa -mfp64 -mload-store-pairs'
@ -181,6 +194,35 @@ if [ "$architecture" = "mips" ] || [ "$architecture" = "mips64" ]; then
rm -rf "$tmpd"
fi
no_lsx=0
no_lasx=0
if [ "$architecture" = "loongarch64" ]; then
tmpd="$(mktemp -d)"
tmplsx="$tmpd/lsx.c"
codelsx='"vadd.b $vr0, $vr0, $vr0"'
lsx_flags='-march=loongarch64'
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelsx" >> "$tmplsx"
args="$lsx_flags -o $tmplsx.o $tmplsx"
{
$compiler_name $flags $args >/dev/null 2>&1
} || {
no_lsx=1
}
tmplasx="$tmpd/lasx.c"
codelasx='"xvadd.b $xr0, $xr0, $xr0"'
lasx_flags='-march=loongarch64'
printf "void main(void){ __asm__ volatile(%s);}\n" "$codelasx" >> "$tmplasx"
args="$lasx_flags -o $tmplasx.o $tmplasx"
{
$compiler_name $flags $args >/dev/null 2>&1
} || {
no_lasx=1
}
rm -rf "$tmpd"
fi
case "$data" in
*ARCH_X86_64*) architecture=x86_64 ;;
*ARCH_X86*) architecture=x86 ;;
@ -195,6 +237,7 @@ case "$data" in
*ARCH_ARM*) architecture=arm ;;
*ARCH_ZARCH*) architecture=zarch ;;
*ARCH_LOONGARCH64*) architecture=loongarch64 ;;
*ARCH_CSKY*) architecture=csky ;;
esac
binformat='bin32'
@ -203,8 +246,9 @@ case "$data" in
esac
no_avx512=0
no_avx512bf=0
if [ "$architecture" = "x86" ] || [ "$architecture" = "x86_64" ]; then
tmpd=`mktemp -d`
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
tmpf="$tmpd/a.c"
code='"vbroadcastss -4 * 4(%rsi), %zmm2"'
printf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf"
@ -221,11 +265,30 @@ if [ "$architecture" = "x86" ] || [ "$architecture" = "x86_64" ]; then
}
rm -rf "$tmpd"
if [ "$no_avx512" -eq 0 ]; then
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
tmpf="$tmpd/a.c"
code='"__m512 a= _mm512_dpbf16_ps(a, (__m512bh) _mm512_loadu_si512(%1]), (__m512bh) _mm512_loadu_si512(%2]));"'
printf "#include <immintrin.h>\n\nint main(void){ %s; }\n" "$code" >> "$tmpf"
if [ "$compiler" = "PGI" ]; then
args=" -tp cooperlake -c -o $tmpf.o $tmpf"
else
args=" -march=cooperlake -c -o $tmpf.o $tmpf"
fi
no_avx512bf=0
{
$compiler_name $flags $args >/dev/null 2>&1
} || {
no_avx512bf=1
}
rm -rf "$tmpd"
fi
fi
no_rv64gv=0
if [ "$architecture" = "riscv64" ]; then
tmpd=`mktemp -d`
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
tmpf="$tmpd/a.c"
code='"vsetvli zero, zero, e8, m1\n"'
printf "int main(void){ __asm__ volatile(%s); }\n" "$code" >> "$tmpf"
@ -241,13 +304,16 @@ fi
no_sve=0
if [ "$architecture" = "arm64" ]; then
tmpd=`mktemp -d`
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
tmpf="$tmpd/a.c"
printf "#include <arm_sve.h>\n\n int main(void){}\n">> "$tmpf"
args=" -march=armv8-a+sve -c -o $tmpf.o $tmpf"
no_sve=0
{
$compiler_name $flags $args >/dev/null 2>&1
} || {
args=" -Msve_intrinsics -c -o $tmpf.o $tmpf"
$compiler_name $flags $args >/dev/null 2>&1
} || {
no_sve=1
}
@ -257,7 +323,7 @@ fi
c11_atomics=0
case "$data" in
*HAVE_C11*)
tmpd=`mktemp -d`
tmpd=$(mktemp -d 2>/dev/null || mktemp -d -t 'OBC')
tmpf="$tmpd/a.c"
printf "#include <stdatomic.h>\nint main(void){}\n" >> "$tmpf"
args=" -c -o $tmpf.o $tmpf"
@ -365,6 +431,7 @@ done
[ "$makefile" = "-" ] && {
[ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n"
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
[ "$no_avx512bf" -eq 1 ] && printf "NO_AVX512BF16=1\n"
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
exit 0
@ -393,8 +460,11 @@ done
[ "$no_sve" -eq 1 ] && printf "NO_SVE=1\n"
[ "$no_rv64gv" -eq 1 ] && printf "NO_RV64GV=1\n"
[ "$no_avx512" -eq 1 ] && printf "NO_AVX512=1\n"
[ "$no_avx512bf" -eq 1 ] && printf "NO_AVX512BF16=1\n"
[ "$no_avx2" -eq 1 ] && printf "NO_AVX2=1\n"
[ "$oldgcc" -eq 1 ] && printf "OLDGCC=1\n"
[ "$no_lsx" -eq 1 ] && printf "NO_LSX=1\n"
[ "$no_lasx" -eq 1 ] && printf "NO_LASX=1\n"
} >> "$makefile"
os=`echo "$os" | tr '[[:lower:]]' '[[:upper:]]'/ `
@ -410,6 +480,8 @@ compiler=`echo "$compiler" | tr '[[:lower:]]' '[[:upper:]]' `
[ -n "$need_fu" ] && printf "#define FUNDERSCORE\t%s\n" "$need_fu"
[ "$no_msa" -eq 1 ] && printf "#define NO_MSA\t1\n"
[ "$c11_atomics" -eq 1 ] && printf "#define HAVE_C11\t1\n"
[ "$no_lsx" -eq 1 ] && printf "#define NO_LSX\t1\n"
[ "$no_lasx" -eq 1 ] && printf "#define NO_LASX\t1\n"
} >> "$config"

View File

@ -97,6 +97,7 @@ $architecture = arm64 if ($data =~ /ARCH_ARM64/);
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$architecture = riscv64 if ($data =~ /ARCH_RISCV64/);
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/);
$architecture = csky if ($data =~ /ARCH_CSKY/);
$defined = 0;
@ -156,6 +157,11 @@ if ($architecture eq "loongarch64") {
$binary = 64;
}
if ($architecture eq "csky") {
$defined = 1;
$binary = 32;
}
if ($compiler eq "PGI") {
$compiler_name .= " -tp p7" if ($binary eq "32");
$compiler_name .= " -tp p7-64" if ($binary eq "64");
@ -232,6 +238,45 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) {
}
}
$no_lsx = 0;
$no_lasx = 0;
if (($architecture eq "loongarch64")) {
eval "use File::Temp qw(tempfile)";
if ($@){
warn "could not load PERL module File::Temp, so could not check LSX and LASX capatibility";
} else {
$tmplsx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
$codelsx = '"vadd.b $vr0, $vr0, $vr0"';
$lsx_flags = "-march=loongarch64";
print $tmplsx "void main(void){ __asm__ volatile($codelsx); }\n";
$args = "$lsx_flags -o $tmplsx.o $tmplsx";
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$no_lsx = 1;
} else {
$no_lsx = 0;
}
unlink("$tmplsx.o");
$tmplasx = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
$codelasx = '"xvadd.b $xr0, $xr0, $xr0"';
$lasx_flags = "-march=loongarch64";
print $tmplasx "void main(void){ __asm__ volatile($codelasx); }\n";
$args = "$lasx_flags -o $tmplasx.o $tmplasx";
my @cmd = ("$compiler_name $flags $args >/dev/null 2>/dev/null");
system(@cmd) == 0;
if ($? != 0) {
$no_lasx = 1;
} else {
$no_lasx = 0;
}
unlink("$tmplasx.o");
}
}
$architecture = x86 if ($data =~ /ARCH_X86/);
$architecture = x86_64 if ($data =~ /ARCH_X86_64/);
$architecture = e2k if ($data =~ /ARCH_E2K/);
@ -245,6 +290,7 @@ $architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/);
$architecture = zarch if ($data =~ /ARCH_ZARCH/);
$architecture = loongarch64 if ($data =~ /ARCH_LOONGARCH64/);
$architecture = csky if ($data =~ /ARCH_CSKY/);
$binformat = bin32;
$binformat = bin64 if ($data =~ /BINARY_64/);
@ -424,6 +470,8 @@ print MAKEFILE "NO_RV64GV=1\n" if $no_rv64gv eq 1;
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1;
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1;
print MAKEFILE "NO_LSX=1\n" if $no_lsx eq 1;
print MAKEFILE "NO_LASX=1\n" if $no_lasx eq 1;
$os =~ tr/[a-z]/[A-Z]/;
$architecture =~ tr/[a-z]/[A-Z]/;
@ -437,6 +485,8 @@ print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64;
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne "";
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1;
print CONFFILE "#define HAVE_C11\t1\n" if $c11_atomics eq 1;
print CONFFILE "#define NO_LSX\t1\n" if $no_lsx eq 1;
print CONFFILE "#define NO_LASX\t1\n" if $no_lasx eq 1;
if ($os eq "LINUX") {

24
cblas.h
View File

@ -12,6 +12,7 @@ extern "C" {
/*Set the number of threads on runtime.*/
void openblas_set_num_threads(int num_threads);
void goto_set_num_threads(int num_threads);
int openblas_set_num_threads_local(int num_threads);
/*Get the number of threads on runtime.*/
int openblas_get_num_threads(void);
@ -100,6 +101,16 @@ CBLAS_INDEX cblas_idamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPE
CBLAS_INDEX cblas_icamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_izamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
float cblas_samax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
double cblas_damax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
float cblas_scamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
double cblas_dzamax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
float cblas_samin(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
double cblas_damin(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
float cblas_scamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
double cblas_dzamin(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_ismax(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_idmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx);
CBLAS_INDEX cblas_icmax(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx);
@ -115,6 +126,9 @@ void cblas_daxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double alpha, OPENBLAS
void cblas_caxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_zaxpy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_caxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_zaxpyc(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
void cblas_scopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST float *x, OPENBLAS_CONST blasint incx, float *y, OPENBLAS_CONST blasint incy);
void cblas_dcopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *x, OPENBLAS_CONST blasint incx, double *y, OPENBLAS_CONST blasint incy);
void cblas_ccopy(OPENBLAS_CONST blasint n, OPENBLAS_CONST void *x, OPENBLAS_CONST blasint incx, void *y, OPENBLAS_CONST blasint incy);
@ -289,6 +303,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
@ -350,7 +372,7 @@ void cblas_cher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBL
void cblas_zher2k(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE Trans, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_xerbla(blasint p, char *rout, char *form, ...);
void cblas_xerbla(blasint p, OPENBLAS_CONST char *rout, OPENBLAS_CONST char *form, ...);
/*** BLAS extensions ***/

View File

@ -44,9 +44,9 @@ endif ()
if (DYNAMIC_ARCH)
if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA55 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
if (${CMAKE_C_COMPILER_VERSION} VERSION_GREATER 9.99)
set(DYNAMIC_CORE "${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2")
set(DYNAMIC_CORE ${DYNAMIC_CORE} NEOVERSEV1 NEOVERSEN2 ARMV8SVE)
endif ()
if (DYNAMIC_LIST)
set(DYNAMIC_CORE ARMV8 ${DYNAMIC_LIST})
@ -82,7 +82,7 @@ if (DYNAMIC_ARCH)
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
endif ()
if (NOT NO_AVX512)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE)
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE SAPPHIRERAPIDS)
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
endif ()
if (DYNAMIC_LIST)
@ -135,7 +135,7 @@ if (ARM64)
set(BINARY_DEFINED 1)
endif ()
if (${ARCH} STREQUAL "riscv64")
if (RISCV64)
set(NO_BINARY_MODE 1)
set(BINARY_DEFINED 1)
endif ()

View File

@ -36,9 +36,19 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LS
if (LOONGARCH64)
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64")
CHECK_CXX_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
if(COMPILER_SUPPORT_LP64D_ABI)
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64d")
else()
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp64")
endif ()
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp32")
CHECK_CXX_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
if(COMPILER_SUPPORT_ILP32D_ABI)
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=ilp32d")
else()
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=lp32")
endif ()
endif ()
set(BINARY_DEFINED 1)
endif ()
@ -65,6 +75,14 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
endif ()
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC")
if (POWER)
set(CCOMMON_OPT "${CCOMMON_OPT} -tp pwr8")
else ()
set(CCOMMON_OPT "${CCOMMON_OPT} -tp px")
endif ()
endif ()
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE")
if (BINARY64)
set(CCOMMON_OPT "${CCOMMON_OPT} -m64")
@ -172,22 +190,30 @@ endif ()
if (${CORE} STREQUAL NEOVERSEN2)
if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
endif()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
endif()
endif ()
endif ()
endif ()
if (${CORE} STREQUAL NEOVERSEV1)
if (NOT DYNAMIC_ARCH)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8.2-a+sve")
endif()
endif()
endif ()
endif ()
@ -205,7 +231,11 @@ endif ()
if (${CORE} STREQUAL ARMV8SVE)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (CCOMMON_OPT "${CCOMMON_OPT} -Msve_intrinsics -march=armv8-a+sve")
else ()
set (CCOMMON_OPT "${CCOMMON_OPT} -march=armv8-a+sve")
endif ()
endif ()
endif ()
@ -262,6 +292,27 @@ if (${CORE} STREQUAL POWER8)
endif ()
endif ()
# With -mcpu=970 added it compiles, but library is broken, at least on macOS. If someone
# tests on *BSD or Linux and adds this flag, please make sure it is not used for macOS case.
if (${CORE} STREQUAL PPC970)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -mtune=970 -maltivec -fno-fast-math")
endif ()
if (APPLE)
set (CCOMMON_OPT "${CCOMMON_OPT} -force_cpusubtype_ALL")
endif ()
endif ()
# -mcpu=G4 seems to work fine, but perhaps avoid it for the sake of consistency?
if (${CORE} STREQUAL PPCG4)
if (NOT DYNAMIC_ARCH)
set (CCOMMON_OPT "${CCOMMON_OPT} -mtune=G4 -maltivec -fno-fast-math")
endif ()
if (APPLE)
set (CCOMMON_OPT "${CCOMMON_OPT} -force_cpusubtype_ALL")
endif ()
endif ()
if (NOT DYNAMIC_ARCH)
if (HAVE_AVX2)
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")

View File

@ -64,6 +64,7 @@ else ()
"#define NEEDBUNDERSCORE 1\n")
endif()
if (CMAKE_Fortran_COMPILER)
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE)
string(TOUPPER ${F_COMPILER} F_COMPILER)
endif()

View File

@ -3,11 +3,9 @@
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets Fortran related variables.
if (${F_COMPILER} STREQUAL "FLANG")
if (${F_COMPILER} STREQUAL "FLANG" AND NOT CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
# This is for classic Flang. LLVM Flang is handled with gfortran below.
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG")
if (BINARY64 AND INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp")
endif ()
@ -38,31 +36,56 @@ if (${F_COMPILER} STREQUAL "G95")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "GFORTRAN")
if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT")
# ensure reentrancy of lapack codes
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
# work around ABI violation in passing string arguments from C
set(FCOMMON_OPT "${FCOMMON_OPT} -fno-optimize-sibling-calls")
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
if (NOT NO_LAPACK)
set(EXTRALIB "${EXTRALIB} -lgfortran")
if (NOT CMAKE_Fortran_COMPILER_ID STREQUAL "LLVMFlang")
# ensure reentrancy of lapack codes
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall -frecursive")
# work around ABI violation in passing string arguments from C
set(FCOMMON_OPT "${FCOMMON_OPT} -fno-optimize-sibling-calls")
if (NOT NO_LAPACK)
# Don't include -lgfortran, when NO_LAPACK=1 or lsbcc
set(EXTRALIB "${EXTRALIB} -lgfortran")
endif ()
endif ()
if (NO_BINARY_MODE)
if (MIPS64)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32")
endif ()
endif ()
if (LOONGARCH64)
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
CHECK_CXX_COMPILER_FLAG("-mabi=lp64d" COMPILER_SUPPORT_LP64D_ABI)
if(COMPILER_SUPPORT_LP64D_ABI)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64d")
else()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp64")
endif ()
else ()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
CHECK_CXX_COMPILER_FLAG("-mabi=ilp32d" COMPILER_SUPPORT_ILP32D_ABI)
if(COMPILER_SUPPORT_ILP32D_ABI)
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=ilp32d")
else()
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=lp32")
endif ()
endif ()
endif ()
if (RISCV64)
if (BINARY64)
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif ()
endif ()
endif ()
if (ARM64 AND INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8")
endif ()
else ()
if (BINARY64)
set(FCOMMON_OPT "${FCOMMON_OPT} -m64")
@ -121,7 +144,7 @@ if (${F_COMPILER} STREQUAL "IBM")
endif ()
endif ()
if (${F_COMPILER} STREQUAL "PGI")
if (${F_COMPILER} STREQUAL "PGI" OR ${F_COMPILER} STREQUAL "PGF95")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI")
set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER")
if (BINARY64)

View File

@ -52,7 +52,7 @@ set(SLASRC
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f
sgehd2.f sgehrd.f sgelq2.f sgelqf.f
sgels.f sgelsd.f sgelss.f sgelsy.f sgeql2.f sgeqlf.f
sgeqp3.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f
sgeqp3.f sgeqp3rk.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f sgerq2.f sgerqf.f
sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f sgetc2.f
sgetrf2.f sgetri.f
sggbak.f sggbal.f
@ -67,7 +67,7 @@ set(SLASRC
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f
slansy.f slantb.f slantp.f slantr.f slanv2.f
slapll.f slapmt.f
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqp2rk.f slaqp3rk.f slaqsb.f slaqsp.f slaqsy.f
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f
slarf.f slarfb.f slarfb_gett.f slarfg.f slarfgp.f slarft.f slarfx.f slarfy.f slargv.f
@ -124,7 +124,7 @@ set(SLASRC
ssbev_2stage.f ssbevx_2stage.f ssbevd_2stage.f ssygv_2stage.f
sgesvdq.f slaorhr_col_getrfnp.f
slaorhr_col_getrfnp2.f sorgtsqr.f sorgtsqr_row.f sorhr_col.f
slatrs3.f strsyl3.f sgelst.f)
slatrs3.f strsyl3.f sgelst.f sgedmd.f90 sgedmdq.f90)
set(SXLASRC sgesvxx.f sgerfsx.f sla_gerfsx_extended.f sla_geamv.f
sla_gercond.f sla_gerpvgrw.f ssysvxx.f ssyrfsx.f
@ -139,7 +139,7 @@ set(CLASRC
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f
cgehd2.f cgehrd.f cgelq2.f cgelqf.f
cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f
cgels.f cgelsd.f cgelss.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f cgeqp3rk.f
cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f cgerq2.f cgerqf.f
cgesc2.f cgesdd.f cgesvd.f cgesvdx.f
cgesvj.f cgejsv.f cgsvj0.f cgsvj1.f
@ -173,7 +173,7 @@ set(CLASRC
clanhb.f clanhe.f
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqp2rk.f claqp3rk.f claqsb.f
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f
claqz0.f claqz1.f claqz2.f claqz3.f
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f
@ -187,7 +187,7 @@ set(CLASRC
cposv.f cposvx.f cpotrf2.f cpotri.f cpstrf.f cpstf2.f
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f
crot.f cspcon.f csprfs.f cspsv.f
crot.f crscl.f cspcon.f csprfs.f cspsv.f
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f
cstegr.f cstein.f csteqr.f csycon.f
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f
@ -223,7 +223,7 @@ set(CLASRC
chbev_2stage.f chbevx_2stage.f chbevd_2stage.f chegv_2stage.f
cgesvdq.f claunhr_col_getrfnp.f claunhr_col_getrfnp2.f
cungtsqr.f cungtsqr_row.f cunhr_col.f
clatrs3.f ctrsyl3.f cgelst.f)
clatrs3.f ctrsyl3.f cgelst.f cgedmd.f90 cgedmdq.f90)
set(CXLASRC cgesvxx.f cgerfsx.f cla_gerfsx_extended.f cla_geamv.f
cla_gercond_c.f cla_gercond_x.f cla_gerpvgrw.f
@ -243,7 +243,7 @@ set(DLASRC
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f
dgehd2.f dgehrd.f dgelq2.f dgelqf.f
dgels.f dgelsd.f dgelss.f dgelsy.f dgeql2.f dgeqlf.f
dgeqp3.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f
dgeqp3.f dgeqp3rk.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f dgerq2.f dgerqf.f
dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f dgetc2.f
dgetrf2.f dgetri.f
dggbak.f dggbal.f
@ -258,7 +258,7 @@ set(DLASRC
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f
dlapll.f dlapmt.f
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqgb.f dlaqge.f dlaqp2.f dlaqp2rk.f dlaqp3rk.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f
dlarf.f dlarfb.f dlarfb_gett.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f dlarfy.f
@ -316,7 +316,7 @@ set(DLASRC
dsbev_2stage.f dsbevx_2stage.f dsbevd_2stage.f dsygv_2stage.f
dcombssq.f dgesvdq.f dlaorhr_col_getrfnp.f
dlaorhr_col_getrfnp2.f dorgtsqr.f dorgtsqr_row.f dorhr_col.f
dlatrs3.f dtrsyl3.f dgelst.f)
dlatrs3.f dtrsyl3.f dgelst.f dgedmd.f90 dgedmdq.f90)
set(DXLASRC dgesvxx.f dgerfsx.f dla_gerfsx_extended.f dla_geamv.f
dla_gercond.f dla_gerpvgrw.f dsysvxx.f dsyrfsx.f
@ -331,7 +331,7 @@ set(ZLASRC
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f
zgehd2.f zgehrd.f zgelq2.f zgelqf.f
zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f
zgels.f zgelsd.f zgelss.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f zgeqp3rk.f
zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvx.f
zgesvj.f zgejsv.f zgsvj0.f zgsvj1.f
@ -367,7 +367,7 @@ set(ZLASRC
zlanhe.f
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqp2rk.f zlaqp3rk.f zlaqps.f zlaqsb.f
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f
zlarcm.f zlarf.f zlarfb.f zlarfb_gett.f
@ -381,7 +381,7 @@ set(ZLASRC
zposv.f zposvx.f zpotrf2.f zpotri.f zpotrs.f zpstrf.f zpstf2.f
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f
zrot.f zspcon.f zsprfs.f zspsv.f
zrot.f zrscl.f zspcon.f zsprfs.f zspsv.f
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f
zstegr.f zstein.f zsteqr.f zsycon.f
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f
@ -419,7 +419,7 @@ set(ZLASRC
zhbev_2stage.f zhbevx_2stage.f zhbevd_2stage.f zhegv_2stage.f
zgesvdq.f zlaunhr_col_getrfnp.f zlaunhr_col_getrfnp2.f
zungtsqr.f zungtsqr_row.f zunhr_col.f
zlatrs3.f ztrsyl3.f zgelst.f)
zlatrs3.f ztrsyl3.f zgelst.f zgedmd.f90 zgedmdq.f90)
set(ZXLASRC zgesvxx.f zgerfsx.f zla_gerfsx_extended.f zla_geamv.f
zla_gercond_c.f zla_gercond_x.f zla_gerpvgrw.f zsysvxx.f zsyrfsx.f
@ -436,19 +436,25 @@ if(USE_XBLAS)
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC})
endif()
if(BUILD_LAPACK_DEPRECATED)
list(APPEND SLASRC DEPRECATED/sgegs.f DEPRECATED/sgegv.f
DEPRECATED/sgelqs.f DEPRECATED/sgeqrs.f
DEPRECATED/sgeqpf.f DEPRECATED/sgelsx.f DEPRECATED/sggsvd.f
DEPRECATED/sggsvp.f DEPRECATED/slahrd.f DEPRECATED/slatzm.f DEPRECATED/stzrqf.f)
list(APPEND DLASRC DEPRECATED/dgegs.f DEPRECATED/dgegv.f
DEPRECATED/dgelqs.f DEPRECATED/dgeqrs.f
DEPRECATED/dgeqpf.f DEPRECATED/dgelsx.f DEPRECATED/dggsvd.f
DEPRECATED/dggsvp.f DEPRECATED/dlahrd.f DEPRECATED/dlatzm.f DEPRECATED/dtzrqf.f)
list(APPEND CLASRC DEPRECATED/cgegs.f DEPRECATED/cgegv.f
DEPRECATED/cgelqs.f DEPRECATED/cgeqrs.f
DEPRECATED/cgeqpf.f DEPRECATED/cgelsx.f DEPRECATED/cggsvd.f
DEPRECATED/cggsvp.f DEPRECATED/clahrd.f DEPRECATED/clatzm.f DEPRECATED/ctzrqf.f)
list(APPEND ZLASRC DEPRECATED/zgegs.f DEPRECATED/zgegv.f
DEPRECATED/zgelqs.f DEPRECATED/zgeqrs.f
DEPRECATED/zgeqpf.f DEPRECATED/zgelsx.f DEPRECATED/zggsvd.f
DEPRECATED/zggsvp.f DEPRECATED/zlahrd.f DEPRECATED/zlatzm.f DEPRECATED/ztzrqf.f)
message(STATUS "Building deprecated routines")
endif()
set(DSLASRC spotrs.f)
@ -551,7 +557,7 @@ set(SLASRC
sgebrd.c sgecon.c sgeequ.c sgees.c sgeesx.c sgeev.c sgeevx.c
sgehd2.c sgehrd.c sgelq2.c sgelqf.c
sgels.c sgelsd.c sgelss.c sgelsy.c sgeql2.c sgeqlf.c
sgeqp3.c sgeqr2.c sgeqr2p.c sgeqrf.c sgeqrfp.c sgerfs.c sgerq2.c sgerqf.c
sgeqp3.c sgeqp3rk.c sgeqr2.c sgeqr2p.c sgeqrf.c sgeqrfp.c sgerfs.c sgerq2.c sgerqf.c
sgesc2.c sgesdd.c sgesvd.c sgesvdx.c sgesvx.c sgetc2.c
sgetrf2.c sgetri.c
sggbak.c sggbal.c
@ -565,7 +571,7 @@ set(SLASRC
slangb.c slange.c slangt.c slanhs.c slansb.c slansp.c
slansy.c slantb.c slantp.c slantr.c slanv2.c
slapll.c slapmt.c
slaqgb.c slaqge.c slaqp2.c slaqps.c slaqsb.c slaqsp.c slaqsy.c
slaqgb.c slaqge.c slaqp2.c slaqp2rk.c slaqp3rk.c slaqps.c slaqsb.c slaqsp.c slaqsy.c
slaqr0.c slaqr1.c slaqr2.c slaqr3.c slaqr4.c slaqr5.c
slaqtr.c slar1v.c slar2v.c ilaslr.c ilaslc.c
slarf.c slarfb.c slarfb_gett.c slarfg.c slarfgp.c slarft.c slarfx.c slarfy.c slargv.c
@ -622,7 +628,7 @@ set(SLASRC
ssbev_2stage.c ssbevx_2stage.c ssbevd_2stage.c ssygv_2stage.c
sgesvdq.c slaorhr_col_getrfnp.c
slaorhr_col_getrfnp2.c sorgtsqr.c sorgtsqr_row.c sorhr_col.c
slatrs3.c strsyl3.c sgelst.c)
slatrs3.c strsyl3.c sgelst.c sgedmd.c sgedmdq.c)
set(SXLASRC sgesvxx.c sgerfsx.c sla_gerfsx_extended.c sla_geamv.c
sla_gercond.c sla_gerpvgrw.c ssysvxx.c ssyrfsx.c
@ -637,7 +643,7 @@ set(CLASRC
cgbtf2.c cgbtrf.c cgbtrs.c cgebak.c cgebal.c cgebd2.c cgebrd.c
cgecon.c cgeequ.c cgees.c cgeesx.c cgeev.c cgeevx.c
cgehd2.c cgehrd.c cgelq2.c cgelqf.c
cgels.c cgelsd.c cgelss.c cgelsy.c cgeql2.c cgeqlf.c cgeqp3.c
cgels.c cgelsd.c cgelss.c cgelsy.c cgeql2.c cgeqlf.c cgeqp3.c cgeqp3rk.c
cgeqr2.c cgeqr2p.c cgeqrf.c cgeqrfp.c cgerfs.c cgerq2.c cgerqf.c
cgesc2.c cgesdd.c cgesvd.c cgesvdx.c
cgesvj.c cgejsv.c cgsvj0.c cgsvj1.c
@ -671,7 +677,7 @@ set(CLASRC
clanhb.c clanhe.c
clanhp.c clanhs.c clanht.c clansb.c clansp.c clansy.c clantb.c
clantp.c clantr.c clapll.c clapmt.c clarcm.c claqgb.c claqge.c
claqhb.c claqhe.c claqhp.c claqp2.c claqps.c claqsb.c
claqhb.c claqhe.c claqhp.c claqp2.c claqp2rk.c claqp3rk.c claqps.c claqsb.c
claqr0.c claqr1.c claqr2.c claqr3.c claqr4.c claqr5.c
claqsp.c claqsy.c clar1v.c clar2v.c ilaclr.c ilaclc.c
clarf.c clarfb.c clarfb_gett.c clarfg.c clarfgp.c clarft.c
@ -684,7 +690,7 @@ set(CLASRC
cposv.c cposvx.c cpotrf2.c cpotri.c cpstrf.c cpstf2.c
cppcon.c cppequ.c cpprfs.c cppsv.c cppsvx.c cpptrf.c cpptri.c cpptrs.c
cptcon.c cpteqr.c cptrfs.c cptsv.c cptsvx.c cpttrf.c cpttrs.c cptts2.c
crot.c cspcon.c csprfs.c cspsv.c
crot.c crscl.c cspcon.c csprfs.c cspsv.c
cspsvx.c csptrf.c csptri.c csptrs.c csrscl.c cstedc.c
cstegr.c cstein.c csteqr.c csycon.c
csyrfs.c csysv.c csysvx.c csytf2.c csytrf.c csytri.c
@ -720,7 +726,7 @@ set(CLASRC
chbev_2stage.c chbevx_2stage.c chbevd_2stage.c chegv_2stage.c
cgesvdq.c claunhr_col_getrfnp.c claunhr_col_getrfnp2.c
cungtsqr.c cungtsqr_row.c cunhr_col.c
clatrs3.c ctrsyl3.c cgelst.c)
clatrs3.c ctrsyl3.c cgelst.c cgedmd.c cgedmdq.c)
set(CXLASRC cgesvxx.c cgerfsx.c cla_gerfsx_extended.c cla_geamv.c
cla_gercond_c.c cla_gercond_x.c cla_gerpvgrw.c
@ -740,7 +746,7 @@ set(DLASRC
dgebrd.c dgecon.c dgeequ.c dgees.c dgeesx.c dgeev.c dgeevx.c
dgehd2.c dgehrd.c dgelq2.c dgelqf.c
dgels.c dgelsd.c dgelss.c dgelsy.c dgeql2.c dgeqlf.c
dgeqp3.c dgeqr2.c dgeqr2p.c dgeqrf.c dgeqrfp.c dgerfs.c dgerq2.c dgerqf.c
dgeqp3.c dgeqp3rk.c dgeqr2.c dgeqr2p.c dgeqrf.c dgeqrfp.c dgerfs.c dgerq2.c dgerqf.c
dgesc2.c dgesdd.c dgesvd.c dgesvdx.c dgesvx.c dgetc2.c
dgetrf2.c dgetri.c
dggbak.c dggbal.c
@ -754,7 +760,7 @@ set(DLASRC
dlangb.c dlange.c dlangt.c dlanhs.c dlansb.c dlansp.c
dlansy.c dlantb.c dlantp.c dlantr.c dlanv2.c
dlapll.c dlapmt.c
dlaqgb.c dlaqge.c dlaqp2.c dlaqps.c dlaqsb.c dlaqsp.c dlaqsy.c
dlaqgb.c dlaqge.c dlaqp2.c dlaqp2rk.c dlaqp3rk.c dlaqps.c dlaqsb.c dlaqsp.c dlaqsy.c
dlaqr0.c dlaqr1.c dlaqr2.c dlaqr3.c dlaqr4.c dlaqr5.c
dlaqtr.c dlar1v.c dlar2v.c iladlr.c iladlc.c
dlarf.c dlarfb.c dlarfb_gett.c dlarfg.c dlarfgp.c dlarft.c dlarfx.c dlarfy.c
@ -812,7 +818,7 @@ set(DLASRC
dsbev_2stage.c dsbevx_2stage.c dsbevd_2stage.c dsygv_2stage.c
dcombssq.c dgesvdq.c dlaorhr_col_getrfnp.c
dlaorhr_col_getrfnp2.c dorgtsqr.c dorgtsqr_row.c dorhr_col.c
dlatrs3.c dtrsyl3.c dgelst.c)
dlatrs3.c dtrsyl3.c dgelst.c dgedmd.c dgedmdq.c)
set(DXLASRC dgesvxx.c dgerfsx.c dla_gerfsx_extended.c dla_geamv.c
dla_gercond.c dla_gerpvgrw.c dsysvxx.c dsyrfsx.c
@ -827,7 +833,7 @@ set(ZLASRC
zgbtf2.c zgbtrf.c zgbtrs.c zgebak.c zgebal.c zgebd2.c zgebrd.c
zgecon.c zgeequ.c zgees.c zgeesx.c zgeev.c zgeevx.c
zgehd2.c zgehrd.c zgelq2.c zgelqf.c
zgels.c zgelsd.c zgelss.c zgelsy.c zgeql2.c zgeqlf.c zgeqp3.c
zgels.c zgelsd.c zgelss.c zgelsy.c zgeql2.c zgeqlf.c zgeqp3.c zgeqp3rk.c
zgeqr2.c zgeqr2p.c zgeqrf.c zgeqrfp.c zgerfs.c zgerq2.c zgerqf.c
zgesc2.c zgesdd.c zgesvd.c zgesvdx.c zgesvx.c
zgesvj.c zgejsv.c zgsvj0.c zgsvj1.c
@ -862,7 +868,7 @@ set(ZLASRC
zlanhe.c
zlanhp.c zlanhs.c zlanht.c zlansb.c zlansp.c zlansy.c zlantb.c
zlantp.c zlantr.c zlapll.c zlapmt.c zlaqgb.c zlaqge.c
zlaqhb.c zlaqhe.c zlaqhp.c zlaqp2.c zlaqps.c zlaqsb.c
zlaqhb.c zlaqhe.c zlaqhp.c zlaqp2.c zlaqp2rk.c zlaqp3rk.c zlaqps.c zlaqsb.c
zlaqr0.c zlaqr1.c zlaqr2.c zlaqr3.c zlaqr4.c zlaqr5.c
zlaqsp.c zlaqsy.c zlar1v.c zlar2v.c ilazlr.c ilazlc.c
zlarcm.c zlarf.c zlarfb.c zlarfb_gett.c
@ -876,7 +882,7 @@ set(ZLASRC
zposv.c zposvx.c zpotrf2.c zpotri.c zpotrs.c zpstrf.c zpstf2.c
zppcon.c zppequ.c zpprfs.c zppsv.c zppsvx.c zpptrf.c zpptri.c zpptrs.c
zptcon.c zpteqr.c zptrfs.c zptsv.c zptsvx.c zpttrf.c zpttrs.c zptts2.c
zrot.c zspcon.c zsprfs.c zspsv.c
zrot.c zrscl.c zspcon.c zsprfs.c zspsv.c
zspsvx.c zsptrf.c zsptri.c zsptrs.c zdrscl.c zstedc.c
zstegr.c zstein.c zsteqr.c zsycon.c
zsyrfs.c zsysv.c zsysvx.c zsytf2.c zsytrf.c zsytri.c
@ -913,7 +919,8 @@ set(ZLASRC
zheevd_2stage.c zheev_2stage.c zheevx_2stage.c zheevr_2stage.c
zhbev_2stage.c zhbevx_2stage.c zhbevd_2stage.c zhegv_2stage.c
zgesvdq.c zlaunhr_col_getrfnp.c zlaunhr_col_getrfnp2.c
zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c)
zungtsqr.c zungtsqr_row.c zunhr_col.c zlatrs3.c ztrsyl3.c zgelst.c
zgedmd.c zgedmdq.c)
set(ZXLASRC zgesvxx.c zgerfsx.c zla_gerfsx_extended.c zla_geamv.c
zla_gercond_c.c zla_gercond_x.c zla_gerpvgrw.c zsysvxx.c zsyrfsx.c
@ -930,19 +937,25 @@ if(USE_XBLAS)
set(ALLXOBJ ${SXLASRC} ${DXLASRC} ${CXLASRC} ${ZXLASRC})
endif()
if(BUILD_LAPACK_DEPRECATED)
list(APPEND SLASRC DEPRECATED/sgegs.c DEPRECATED/sgegv.c
DEPRECATED/sgelqs.c DEPRECATED/sgeqrs.c
DEPRECATED/sgeqpf.c DEPRECATED/sgelsx.c DEPRECATED/sggsvd.c
DEPRECATED/sggsvp.c DEPRECATED/slahrd.c DEPRECATED/slatzm.c DEPRECATED/stzrqf.c)
list(APPEND DLASRC DEPRECATED/dgegs.c DEPRECATED/dgegv.c
DEPRECATED/dgelqs.c DEPRECATED/dgeqrs.c
DEPRECATED/dgeqpf.c DEPRECATED/dgelsx.c DEPRECATED/dggsvd.c
DEPRECATED/dggsvp.c DEPRECATED/dlahrd.c DEPRECATED/dlatzm.c DEPRECATED/dtzrqf.c)
list(APPEND CLASRC DEPRECATED/cgegs.c DEPRECATED/cgegv.c
DEPRECATED/cgelqs.c DEPRECATED/cgeqrs.c
DEPRECATED/cgeqpf.c DEPRECATED/cgelsx.c DEPRECATED/cggsvd.c
DEPRECATED/cggsvp.c DEPRECATED/clahrd.c DEPRECATED/clatzm.c DEPRECATED/ctzrqf.c)
list(APPEND ZLASRC DEPRECATED/zgegs.c DEPRECATED/zgegv.c
DEPRECATED/zgelqs.c DEPRECATED/zgeqrs.c
DEPRECATED/zgeqpf.c DEPRECATED/zgelsx.c DEPRECATED/zggsvd.c
DEPRECATED/zggsvp.c DEPRECATED/zlahrd.c DEPRECATED/zlatzm.c DEPRECATED/ztzrqf.c)
message(STATUS "Building deprecated routines")
endif()
set(DSLASRC spotrs.c)

View File

@ -70,8 +70,6 @@ set(CSRC
lapacke_cgeqlf_work.c
lapacke_cgeqp3.c
lapacke_cgeqp3_work.c
lapacke_cgeqpf.c
lapacke_cgeqpf_work.c
lapacke_cgeqr.c
lapacke_cgeqr_work.c
lapacke_cgeqr2.c
@ -92,6 +90,10 @@ set(CSRC
lapacke_cgerqf_work.c
lapacke_cgesdd.c
lapacke_cgesdd_work.c
lapacke_cgedmd.c
lapacke_cgedmd_work.c
lapacke_cgedmdq.c
lapacke_cgedmdq_work.c
lapacke_cgesv.c
lapacke_cgesv_work.c
lapacke_cgesvd.c
@ -144,12 +146,8 @@ set(CSRC
lapacke_cggqrf_work.c
lapacke_cggrqf.c
lapacke_cggrqf_work.c
lapacke_cggsvd.c
lapacke_cggsvd_work.c
lapacke_cggsvd3.c
lapacke_cggsvd3_work.c
lapacke_cggsvp.c
lapacke_cggsvp_work.c
lapacke_cggsvp3.c
lapacke_cggsvp3_work.c
lapacke_cgtcon.c
@ -564,6 +562,8 @@ set(CSRC
lapacke_ctrsna_work.c
lapacke_ctrsyl.c
lapacke_ctrsyl_work.c
lapacke_ctrsyl3.c
lapacke_ctrsyl3_work.c
lapacke_ctrtri.c
lapacke_ctrtri_work.c
lapacke_ctrtrs.c
@ -596,6 +596,8 @@ set(CSRC
lapacke_cungtr_work.c
lapacke_cungtsqr_row.c
lapacke_cungtsqr_row_work.c
lapacke_cunhr_col.c
lapacke_cunhr_col_work.c
lapacke_cunmbr.c
lapacke_cunmbr_work.c
lapacke_cunmhr.c
@ -695,8 +697,6 @@ set(DSRC
lapacke_dgeqlf_work.c
lapacke_dgeqp3.c
lapacke_dgeqp3_work.c
lapacke_dgeqpf.c
lapacke_dgeqpf_work.c
lapacke_dgeqr.c
lapacke_dgeqr_work.c
lapacke_dgeqr2.c
@ -717,6 +717,10 @@ set(DSRC
lapacke_dgerqf_work.c
lapacke_dgesdd.c
lapacke_dgesdd_work.c
lapacke_dgedmd.c
lapacke_dgedmd_work.c
lapacke_dgedmdq.c
lapacke_dgedmdq_work.c
lapacke_dgesv.c
lapacke_dgesv_work.c
lapacke_dgesvd.c
@ -771,12 +775,8 @@ set(DSRC
lapacke_dggqrf_work.c
lapacke_dggrqf.c
lapacke_dggrqf_work.c
lapacke_dggsvd.c
lapacke_dggsvd_work.c
lapacke_dggsvd3.c
lapacke_dggsvd3_work.c
lapacke_dggsvp.c
lapacke_dggsvp_work.c
lapacke_dggsvp3.c
lapacke_dggsvp3_work.c
lapacke_dgtcon.c
@ -874,6 +874,8 @@ set(DSRC
lapacke_dorgtr_work.c
lapacke_dorgtsqr_row.c
lapacke_dorgtsqr_row_work.c
lapacke_dorhr_col.c
lapacke_dorhr_col_work.c
lapacke_dormbr.c
lapacke_dormbr_work.c
lapacke_dormhr.c
@ -1186,6 +1188,8 @@ set(DSRC
lapacke_dtrsna_work.c
lapacke_dtrsyl.c
lapacke_dtrsyl_work.c
lapacke_dtrsyl3.c
lapacke_dtrsyl3_work.c
lapacke_dtrtri.c
lapacke_dtrtri_work.c
lapacke_dtrtrs.c
@ -1275,8 +1279,6 @@ set(SSRC
lapacke_sgeqlf_work.c
lapacke_sgeqp3.c
lapacke_sgeqp3_work.c
lapacke_sgeqpf.c
lapacke_sgeqpf_work.c
lapacke_sgeqr.c
lapacke_sgeqr_work.c
lapacke_sgeqr2.c
@ -1297,6 +1299,10 @@ set(SSRC
lapacke_sgerqf_work.c
lapacke_sgesdd.c
lapacke_sgesdd_work.c
lapacke_sgedmd.c
lapacke_sgedmd_work.c
lapacke_sgedmdq.c
lapacke_sgedmdq_work.c
lapacke_sgesv.c
lapacke_sgesv_work.c
lapacke_sgesvd.c
@ -1351,12 +1357,8 @@ set(SSRC
lapacke_sggqrf_work.c
lapacke_sggrqf.c
lapacke_sggrqf_work.c
lapacke_sggsvd.c
lapacke_sggsvd_work.c
lapacke_sggsvd3.c
lapacke_sggsvd3_work.c
lapacke_sggsvp.c
lapacke_sggsvp_work.c
lapacke_sggsvp3.c
lapacke_sggsvp3_work.c
lapacke_sgtcon.c
@ -1453,6 +1455,8 @@ set(SSRC
lapacke_sorgtr_work.c
lapacke_sorgtsqr_row.c
lapacke_sorgtsqr_row_work.c
lapacke_sorhr_col.c
lapacke_sorhr_col_work.c
lapacke_sormbr.c
lapacke_sormbr_work.c
lapacke_sormhr.c
@ -1762,6 +1766,8 @@ set(SSRC
lapacke_strsna_work.c
lapacke_strsyl.c
lapacke_strsyl_work.c
lapacke_ctrsyl3.c
lapacke_ctrsyl3_work.c
lapacke_strtri.c
lapacke_strtri_work.c
lapacke_strtrs.c
@ -1849,8 +1855,6 @@ set(ZSRC
lapacke_zgeqlf_work.c
lapacke_zgeqp3.c
lapacke_zgeqp3_work.c
lapacke_zgeqpf.c
lapacke_zgeqpf_work.c
lapacke_zgeqr.c
lapacke_zgeqr_work.c
lapacke_zgeqr2.c
@ -1871,6 +1875,10 @@ set(ZSRC
lapacke_zgerqf_work.c
lapacke_zgesdd.c
lapacke_zgesdd_work.c
lapacke_zgedmd.c
lapacke_zgedmd_work.c
lapacke_zgedmdq.c
lapacke_zgedmdq_work.c
lapacke_zgesv.c
lapacke_zgesv_work.c
lapacke_zgesvd.c
@ -1925,12 +1933,8 @@ set(ZSRC
lapacke_zggqrf_work.c
lapacke_zggrqf.c
lapacke_zggrqf_work.c
lapacke_zggsvd.c
lapacke_zggsvd_work.c
lapacke_zggsvd3.c
lapacke_zggsvd3_work.c
lapacke_zggsvp.c
lapacke_zggsvp_work.c
lapacke_zggsvp3.c
lapacke_zggsvp3_work.c
lapacke_zgtcon.c
@ -2343,6 +2347,8 @@ set(ZSRC
lapacke_ztrsna_work.c
lapacke_ztrsyl.c
lapacke_ztrsyl_work.c
lapacke_ztrsyl3.c
lapacke_ztrsyl3_work.c
lapacke_ztrtri.c
lapacke_ztrtri_work.c
lapacke_ztrtrs.c
@ -2375,6 +2381,8 @@ set(ZSRC
lapacke_zungtr_work.c
lapacke_zungtsqr_row.c
lapacke_zungtsqr_row_work.c
lapacke_zunhr_col.c
lapacke_zunhr_col_work.c
lapacke_zunmbr.c
lapacke_zunmbr_work.c
lapacke_zunmhr.c
@ -2401,6 +2409,12 @@ set(ZSRC
lapacke_csyr_work.c
lapacke_ilaver.c
)
if (BUILD_LAPACK_DEPRECATED)
set(SRCS $SRCS lapacke_sgeqpf.c lapacke_sgeqpf_work.c lapacke_sggsvd.c lapacke_sggsvd_work.c lapacke_sggsvp.c lapacke_sggsvp_work.c)
set(SRCD $SRCD lapacke_dgeqpf.c lapacke_dgeqpf_work.c lapacke_dggsvd.c lapacke_dggsvd_work.c lapacke_dggsvp.c lapacke_dggsvp_work.c)
set(SRCC $SRCC lapacke_cgeqpf.c lapacke_cgeqpf_work.c lapacke_cggsvd.c lapacke_cggsvd_work.c lapacke_cggsvp.c lapacke_cggsvp_work.c)
set(SRCZ $SRCZ lapacke_zgeqpf.c lapacke_zgeqpf_work.c lapacke_zggsvd.c lapacke_zggsvd_work.c lapacke_zggsvp.c lapacke_zggsvp_work.c)
endif()
set(SRCX
lapacke_cgbrfsx.c lapacke_cporfsx.c lapacke_dgerfsx.c lapacke_sgbrfsx.c lapacke_ssyrfsx.c lapacke_zherfsx.c

View File

@ -1,11 +1,13 @@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
libnameprefix=@LIBNAMEPREFIX@
libnamesuffix=@LIBNAMESUFFIX@
libsuffix=@SUFFIX64_UNDERSCORE@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
openblas_config=USE_64BITINT=@INTERFACE64@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OPENBLAS_VERSION@
URL: https://github.com/xianyi/OpenBLAS
Libs: @OpenMP_C_FLAGS@ -L${libdir} -lopenblas${libsuffix}
Version: @OpenBLAS_VERSION@
URL: https://github.com/OpenMathLib/OpenBLAS
Libs: @OpenMP_C_FLAGS@ -L${libdir} -l${libnameprefix}openblas${libnamesuffix}${libsuffix}
Cflags: -I${includedir}

View File

@ -55,7 +55,7 @@ if (DEFINED TARGET)
endif ()
# On x86_64 build getarch with march=native. This is required to detect AVX512 support in getarch.
if (X86_64 AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "PGI")
if (X86_64 AND NOT (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" OR ${CMAKE_C_COMPILER_ID} STREQUAL "NVHPC"))
set(GETARCH_FLAGS "${GETARCH_FLAGS} -march=native")
endif ()
@ -280,7 +280,41 @@ if (DEFINED TARGET)
if (${TARGET} STREQUAL POWER8)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math")
endif()
if (${TARGET} STREQUAL NEOVERSEV1)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve_intrinsics -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.4-a+sve -mtune=neoverse-v1")
else ()
message(FATAL_ERROR "Compiler ${CMAKE_C_COMPILER} ${GCC_VERSION} does not support Neoverse V1.")
endif()
endif()
endif()
if (${TARGET} STREQUAL NEOVERSEN2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.4 OR ${GCC_VERSION} VERSION_EQUAL 10.4)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2")
else ()
message(FATAL_ERROR "Compiler $${CMAKE_C_COMPILER} {GCC_VERSION} does not support Neoverse N2.")
endif()
endif()
endif()
if (${TARGET} STREQUAL ARMV8SVE)
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI" AND NOT NO_SVE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -Msve-intrinsics -march=armv8.2-a+sve")
else ()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=armv8.2-a+sve")
endif()
endif()
endif()
if (DEFINED BINARY)
message(STATUS "Compiling a ${BINARY}-bit binary.")
endif ()
@ -467,10 +501,11 @@ set(CCOMMON_OPT "${CCOMMON_OPT} -DBLAS3_MEM_ALLOC_THRESHOLD=${BLAS3_MEM_ALLOC_TH
endif()
endif()
endif()
set(LIBPREFIX "lib${LIBNAMEPREFIX}openblas")
if (DEFINED LIBNAMESUFFIX)
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}")
else ()
set(LIBPREFIX "libopenblas")
set(LIBPREFIX "${LIBNAMEPREFIX}_${LIBNAMESUFFIX}")
endif ()
if (NOT DEFINED SYMBOLPREFIX)
@ -645,6 +680,10 @@ else ()
endif ()
endif ()
if (DEFINED FIXED_LIBNAME)
set (LIBNAME "${LIBPREFIX}.${LIBSUFFIX}")
set (LIBNAME "${LIBPREFIX}_p.${LIBSUFFIX}")
endif()
set(LIBDLLNAME "${LIBPREFIX}.dll")
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so")

View File

@ -38,13 +38,15 @@ if(CMAKE_CL_64 OR MINGW64)
endif()
elseif(MINGW OR (MSVC AND NOT CMAKE_CROSSCOMPILING))
set(X86 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "ppc.*|power.*|Power.*" OR (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND CMAKE_OSX_ARCHITECTURES MATCHES "ppc.*"))
set(POWER 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "mips64.*")
set(MIPS64 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "loongarch64.*")
set(LOONGARCH64 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64.*")
set(RISCV64 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" OR (CMAKE_SYSTEM_NAME MATCHES "Darwin" AND CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*"))
if (NOT BINARY)
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
set(X86_64 1)
@ -60,7 +62,7 @@ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
endif()
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*")
set(X86 1)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*)")
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64.*|AARCH64.*|arm64.*|ARM64.*|armv8.*)")
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8")
set(ARM64 1)
else()
@ -107,7 +109,7 @@ else()
endif ()
if (NOT BINARY)
if (X86_64 OR ARM64 OR POWER OR MIPS64 OR LOONGARCH64)
if (X86_64 OR ARM64 OR MIPS64 OR LOONGARCH64 OR RISCV64 OR (POWER AND NOT (CMAKE_OSX_ARCHITECTURES STREQUAL "ppc")))
set(BINARY 64)
else ()
set(BINARY 32)

View File

@ -87,6 +87,15 @@ macro(ParseMakefileVars MAKEFILE_IN)
#message(STATUS "skipping ${makefile_line}")
continue ()
endif ()
# Example 1: SBGEMM_SMALL_M_PERMIT =
# Unset the variable
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
set(var_name ${CMAKE_MATCH_1})
unset(${var_name})
endif()
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}")
if (NOT "${line_match}" STREQUAL "")
#message(STATUS "match on ${line_match}")

View File

@ -396,7 +396,7 @@ typedef int blasint;
#endif
/***
To alloc job_t on heap or statck.
To alloc job_t on heap or stack.
please https://github.com/xianyi/OpenBLAS/issues/246
***/
#if defined(OS_WINDOWS)
@ -482,6 +482,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_e2k.h"
#endif
#ifdef ARCH_CSKY
#include "common_csky.h"
#endif
#ifndef ASSEMBLER
#ifdef OS_WINDOWSSTORE
typedef char env_var_t[MAX_PATH];
@ -525,7 +529,7 @@ static inline unsigned long long rpcc(void){
#endif // !RPCC_DEFINED
#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__)
static void __inline blas_lock(volatile BLASULONG *address){
static __inline void blas_lock(volatile BLASULONG *address){
do {
while (*address) {YIELDING;};

View File

@ -45,7 +45,7 @@
#define WMB asm("wmb")
#define RMB asm("mb")
static void __inline blas_lock(unsigned long *address){
static __inline void blas_lock(unsigned long *address){
#ifndef __DECC
unsigned long tmp1, tmp2;
asm volatile(

View File

@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8)
static void __inline blas_lock(volatile BLASULONG *address){
static __inline void blas_lock(volatile BLASULONG *address){
int register ret;

View File

@ -55,7 +55,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ASSEMBLER
static void __inline blas_lock(volatile BLASULONG *address){
static __inline void blas_lock(volatile BLASULONG *address){
BLASULONG ret;
@ -162,7 +162,11 @@ REALNAME:
#define HUGE_PAGESIZE ( 4 << 20)
#ifndef BUFFERSIZE
#if defined(NEOVERSEN1) || defined(NEOVERSEN2) || defined(NEOVERSEV1) || defined(A64FX) || defined(ARMV8SVE)
#define BUFFER_SIZE (32 << 22)
#else
#define BUFFER_SIZE (32 << 20)
#endif
#else
#define BUFFER_SIZE (32 << BUFFERSIZE)
#endif

56
common_csky.h Normal file
View File

@ -0,0 +1,56 @@
/*****************************************************************************
Copyright (c) 2011-2015, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#ifndef COMMON_CSKY
#define COMMON_CSKY
#define MB __sync_synchronize()
#define WMB __sync_synchronize()
#define RMB __sync_synchronize()
#define INLINE inline
#ifndef ASSEMBLER
static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#endif
#define BUFFER_SIZE ( 32 << 20)
#define SEEK_ADDRESS
#endif

View File

@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double *
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);
void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *,
float *, blasint *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *,
double *, blasint *, double *, blasint *, double *, double *, blasint *);
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *,
float *, blasint *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *,
double *, blasint *, double *, blasint *, double *, double *, blasint *);
int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *,
float *, float *, blasint *, float *, blasint *,
float *, float *, blasint *);
@ -764,8 +773,8 @@ xdouble BLASFUNC(qlamc3)(xdouble *, xdouble *);
void BLASFUNC(saxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(daxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
void BLASFUNC(caxpby) (blasint *, float *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(zaxpby) (blasint *, double *, double *, blasint *, double *, double *, blasint *);
void BLASFUNC(caxpby) (blasint *, void *, float *, blasint *, void *, float *, blasint *);
void BLASFUNC(zaxpby) (blasint *, void *, double *, blasint *, void *, double *, blasint *);
void BLASFUNC(somatcopy) (char *, char *, blasint *, blasint *, float *, float *, blasint *, float *, blasint *);
void BLASFUNC(domatcopy) (char *, char *, blasint *, blasint *, double *, double *, blasint *, double *, blasint *);

View File

@ -83,6 +83,19 @@ static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}
#ifndef NO_AFFINITY
static inline int WhereAmI(void){
int ret = 0, counter = 0;
__asm__ volatile (
"rdtimel.w %[counter], %[id]"
: [id]"=r"(ret), [counter]"=r"(counter)
:
: "memory"
);
return ret;
}
#endif
#ifdef DOUBLE
#define GET_IMAGE(res) __asm__ __volatile__("fmov.d %0, $f2" : "=f"(res) : : "memory")
#else
@ -106,12 +119,50 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define MOV fmov.d
#define CMOVT fsel
#define MTC movgr2fr.d
#define MTG movfr2gr.d
#define FABS fabs.d
#define FMIN fmin.d
#define FMINA fmina.d
#define FMAX fmax.d
#define FMAXA fmaxa.d
#define CMPEQ fcmp.ceq.d
#define CMPLE fcmp.cle.d
#define CMPLT fcmp.clt.d
#define NEG fneg.d
#define FFINT ffint.d.l
#define XVFSUB xvfsub.d
#define XVFADD xvfadd.d
#define XVFMUL xvfmul.d
#define XVFMADD xvfmadd.d
#define XVFMIN xvfmin.d
#define XVFMINA xvfmina.d
#define XVFMAX xvfmax.d
#define XVFMAXA xvfmaxa.d
#define XVCMPEQ xvfcmp.ceq.d
#define XVCMPLE xvfcmp.cle.d
#define XVCMPLT xvfcmp.clt.d
#define XVMUL xvfmul.d
#define XVMSUB xvfmsub.d
#define XVNMSUB xvfnmsub.d
#define VFSUB vfsub.d
#define VFADD vfadd.d
#define VFMUL vfmul.d
#define VFMADD vfmadd.d
#define VFMIN vfmin.d
#define VFMINA vfmina.d
#define VFMAX vfmax.d
#define VFMAXA vfmaxa.d
#define VCMPEQ vfcmp.ceq.d
#define VCMPLE vfcmp.cle.d
#define VCMPLT vfcmp.clt.d
#define VMUL vfmul.d
#define VMSUB vfmsub.d
#define VNMSUB vfnmsub.d
#else
#define LD fld.s
#define ST fst.s
#define MADD fmadd.s
@ -124,11 +175,48 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define MOV fmov.s
#define CMOVT fsel
#define MTC movgr2fr.w
#define MTG movfr2gr.s
#define FABS fabs.s
#define FMIN fmin.s
#define FMINA fmina.s
#define FMAX fmax.s
#define FMAXA fmaxa.s
#define CMPEQ fcmp.ceq.s
#define CMPLE fcmp.cle.s
#define CMPLT fcmp.clt.s
#define NEG fneg.s
#define FFINT ffint.s.l
#define XVFSUB xvfsub.s
#define XVFADD xvfadd.s
#define XVFMUL xvfmul.s
#define XVFMADD xvfmadd.s
#define XVFMIN xvfmin.s
#define XVFMINA xvfmina.s
#define XVFMAX xvfmax.s
#define XVFMAXA xvfmaxa.s
#define XVCMPEQ xvfcmp.ceq.s
#define XVCMPLE xvfcmp.cle.s
#define XVCMPLT xvfcmp.clt.s
#define XVMUL xvfmul.s
#define XVMSUB xvfmsub.s
#define XVNMSUB xvfnmsub.s
#define VFSUB vfsub.s
#define VFADD vfadd.s
#define VFMUL vfmul.s
#define VFMADD vfmadd.s
#define VFMIN vfmin.s
#define VFMINA vfmina.s
#define VFMAX vfmax.s
#define VFMAXA vfmaxa.s
#define VCMPEQ vfcmp.ceq.s
#define VCMPLE vfcmp.cle.s
#define VCMPLT vfcmp.clt.s
#define VMUL vfmul.s
#define VMSUB vfmsub.s
#define VNMSUB vfnmsub.s
#endif /* defined(DOUBLE) */
#if defined(__64BIT__) && defined(USE64BITINT)

View File

@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@ -45,12 +46,14 @@
typedef struct {
int dtb_entries;
int switch_ratio;
int offsetA, offsetB, align;
#if BUILD_BFLOAT16 == 1
int sbgemm_p, sbgemm_q, sbgemm_r;
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
int sbgemm_align_k;
int need_amxtile_permission; // 0 default, 1 for device support amx.
void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);

View File

@ -91,7 +91,7 @@
void *qalloc(int flags, size_t bytes);
static void INLINE blas_lock(volatile unsigned long *address){
static INLINE void blas_lock(volatile unsigned long *address){
long int ret, val = 1;

View File

@ -91,8 +91,26 @@ static inline int blas_quickdivide(blasint x, blasint y){
#define BUFFER_SIZE ( 32 << 20)
#define SEEK_ADDRESS
#if defined(C910V)
#include <riscv_vector.h>
#if defined(C910V) || defined(RISCV64_ZVL256B) || defined(RISCV64_ZVL128B) || defined(x280)
# include <riscv_vector.h>
#endif
#if defined( __riscv_xtheadc ) && defined( __riscv_v ) && ( __riscv_v <= 7000 )
// t-head toolchain uses obsolete rvv intrinsics, can't build for C910V without this
#define RISCV_0p10_INTRINSICS
#define RISCV_RVV(x) x
#else
#define RISCV_RVV(x) __riscv_ ## x
#endif
#if defined(C910V) || defined(RISCV64_ZVL256B)
# if !defined(DOUBLE)
# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f32m1_f32)(v)
# else
# define EXTRACT_FLOAT(v) RISCV_RVV(vfmv_f_s_f64m1_f64)(v)
# endif
#else
# define EXTRACT_FLOAT(v) (v[0])
#endif
#endif

View File

@ -45,7 +45,7 @@
#ifndef ASSEMBLER
static void __inline blas_lock(volatile unsigned long *address){
static __inline void blas_lock(volatile unsigned long *address){
long int ret = 1;

View File

@ -53,7 +53,6 @@ extern void goto_set_num_threads(int nthreads);
/* Global Parameter */
extern int blas_cpu_number;
extern int blas_num_threads;
extern int blas_num_threads_set;
extern int blas_omp_linked;
#define BLAS_LEGACY 0x8000U
@ -112,8 +111,9 @@ typedef struct blas_queue {
struct blas_queue *next;
#if defined( __WIN32__) || defined(__CYGWIN32__) || defined(_WIN32) || defined(__CYGWIN__)
CRITICAL_SECTION lock;
HANDLE finish;
// CRITICAL_SECTION lock;
// HANDLE finish;
volatile int finished;
#else
pthread_mutex_t lock;
pthread_cond_t finished;
@ -136,27 +136,32 @@ typedef struct blas_queue {
#ifdef SMP_SERVER
extern int blas_server_avail;
extern int blas_omp_number_max;
extern int blas_omp_threads_local;
static __inline int num_cpu_avail(int level) {
#ifdef USE_OPENMP
int openmp_nthreads;
if (blas_num_threads_set == 0)
openmp_nthreads=omp_get_max_threads();
else
openmp_nthreads=blas_cpu_number;
if (omp_in_parallel()) openmp_nthreads = blas_omp_threads_local;
#endif
#ifndef USE_OPENMP
if (blas_cpu_number == 1
#endif
#ifdef USE_OPENMP
if (openmp_nthreads == 1 || omp_in_parallel()
#else
if (openmp_nthreads == 1
#endif
) return 1;
#ifdef USE_OPENMP
if (blas_cpu_number != openmp_nthreads) {
if (openmp_nthreads > blas_omp_number_max){
#ifdef DEBUG
fprintf(stderr,"WARNING - more OpenMP threads requested (%d) than available (%d)\n",openmp_nthreads,blas_omp_number_max);
#endif
openmp_nthreads = blas_omp_number_max;
}
if (blas_cpu_number != openmp_nthreads) {
goto_set_num_threads(openmp_nthreads);
}
#endif
@ -189,27 +194,27 @@ int exec_blas(BLASLONG num_cpu, blas_param_t *param, void *buffer);
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int threads);
void *c, BLASLONG ldc, int (*function)(void), int threads);
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_m (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *, void *, BLASLONG);
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_n (int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT*, FLOAT*, BLASLONG), void *, void *, BLASLONG);
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int gemm_thread_mn(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG);
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG, BLASLONG);
int gemm_thread_variable(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *, void *, BLASLONG, BLASLONG);
int trsm_thread(int mode, BLASLONG m, BLASLONG n,
double alpha_r, double alpha_i,
void *a, BLASLONG lda,
void *c, BLASLONG ldc, int (*function)(), void *buffer);
void *c, BLASLONG ldc, int (*function)(void), void *buffer);
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(), void *, void *, BLASLONG);
int syrk_thread(int mode, blas_arg_t *, BLASLONG *, BLASLONG *, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void*, void*, BLASLONG);
int getrf_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k,
void *offsetA, BLASLONG lda,
void *offsetB, BLASLONG jb,
void *ipiv, BLASLONG offset, int (*function)(), void *buffer);
void *ipiv, BLASLONG offset, int (*function)(void), void *buffer);
#endif /* ENDIF ASSEMBLER */

View File

@ -54,7 +54,7 @@
#define __volatile__
#endif
static void __inline blas_lock(volatile BLASULONG *address){
static __inline void blas_lock(volatile BLASULONG *address){
int ret;

View File

@ -70,7 +70,7 @@
#define RMB
#endif
static void __inline blas_lock(volatile BLASULONG *address){
static __inline void blas_lock(volatile BLASULONG *address){
#ifndef C_MSVC

View File

@ -45,7 +45,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#ifndef ASSEMBLER
/*
static void __inline blas_lock(volatile BLASULONG *address){
static __inline void blas_lock(volatile BLASULONG *address){
BLASULONG ret;

View File

@ -267,8 +267,10 @@ int detect(void)
}
#else
#ifdef __APPLE__
sysctlbyname("hw.cpufamily",&value,&length,NULL,0);
if (value ==131287967|| value == 458787763 ) return CPU_VORTEX;
sysctlbyname("hw.cpufamily",&value64,&length64,NULL,0);
if (value64 ==131287967|| value64 == 458787763 ) return CPU_VORTEX; //A12/M1
if (value64 == 3660830781) return CPU_VORTEX; //A15/M2
if (value64 == 2271604202) return CPU_VORTEX; //A16/M3
#endif
return CPU_ARMV8;
#endif

View File

@ -32,6 +32,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/
#include <stdint.h>
#include <sys/auxv.h>
/* If LASX extension instructions supported,
* using core LOONGSON3R5
@ -46,9 +47,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CPU_LOONGSON3R5 1
#define CPU_LOONGSON2K1000 2
#define LOONGARCH_CFG2 0x02
#define LOONGARCH_LASX 1<<7
#define LOONGARCH_LSX 1<<6
#define LA_HWCAP_LSX (1U << 4)
#define LA_HWCAP_LASX (1U << 5)
static char *cpuname[] = {
"LOONGSONGENERIC",
@ -64,17 +64,11 @@ static char *cpuname_lower[] = {
int detect(void) {
#ifdef __linux
uint32_t reg = 0;
int hwcap = (int)getauxval(AT_HWCAP);
__asm__ volatile (
"cpucfg %0, %1 \n\t"
: "+&r"(reg)
: "r"(LOONGARCH_CFG2)
);
if (reg & LOONGARCH_LASX)
if (hwcap & LA_HWCAP_LASX)
return CPU_LOONGSON3R5;
else if (reg & LOONGARCH_LSX)
else if (hwcap & LA_HWCAP_LSX)
return CPU_LOONGSON2K1000;
else
return CPU_GENERIC;
@ -100,7 +94,9 @@ void get_subdirname(void) {
}
void get_cpuconfig(void) {
uint32_t hwcaps = 0;
int d = detect();
switch (d) {
case CPU_LOONGSON3R5:
printf("#define LOONGSON3R5\n");
@ -135,6 +131,10 @@ void get_cpuconfig(void) {
printf("#define L2_ASSOCIATIVE 16\n");
break;
}
hwcaps = (uint32_t)getauxval( AT_HWCAP );
if (hwcaps & LA_HWCAP_LSX) printf("#define HAVE_LSX\n");
if (hwcaps & LA_HWCAP_LASX) printf("#define HAVE_LASX\n");
}
void get_libname(void){

View File

@ -160,6 +160,7 @@ int detect(void){
infoCount = HOST_BASIC_INFO_COUNT;
host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount);
if (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_7400) return CPUTYPE_PPCG4;
if (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_7450) return CPUTYPE_PPCG4;
if (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970) return CPUTYPE_PPC970;

View File

@ -70,12 +70,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/
#define CPU_GENERIC 0
#define CPU_C910V 1
#define CPU_GENERIC 0
#define CPU_C910V 1
#define CPU_x280 2
#define CPU_RISCV64_ZVL256B 3
#define CPU_RISCV64_ZVL128B 4
static char *cpuname[] = {
"RISCV64_GENERIC",
"C910V"
"C910V",
"x280",
"CPU_RISCV64_ZVL256B",
"CPU_RISCV64_ZVL128B"
};
static char *cpuname_lower[] = {
"riscv64_generic",
"c910v",
"x280",
"riscv64_zvl256b",
"riscv64_zvl128b"
};
int detect(void){
@ -86,23 +100,29 @@ int detect(void){
char *pmodel = NULL, *pisa = NULL;
infile = fopen("/proc/cpuinfo", "r");
if (!infile)
return CPU_GENERIC;
while (fgets(buffer, sizeof(buffer), infile)){
if(!strncmp(buffer, "model name", 10)){
strcpy(model_buffer, buffer);
pmodel = strchr(isa_buffer, ':') + 1;
pmodel = strchr(model_buffer, ':');
if (pmodel)
pmodel++;
}
if(!strncmp(buffer, "isa", 3)){
strcpy(isa_buffer, buffer);
pisa = strchr(isa_buffer, '4') + 1;
pisa = strchr(isa_buffer, '4');
if (pisa)
pisa++;
}
}
fclose(infile);
if (!pmodel)
if (!pmodel || !pisa)
return(CPU_GENERIC);
if (strstr(pmodel, check_c910_str) && strchr(pisa, 'v'))
return CPU_C910V;
@ -140,5 +160,5 @@ void get_cpuconfig(void){
}
void get_libname(void){
printf("riscv64\n");
printf("%s", cpuname_lower[detect()]);
}

View File

@ -194,7 +194,7 @@ static C_INLINE void xgetbv(int op, int * eax, int * edx){
}
#endif
int support_avx(){
int support_avx(void){
#ifndef NO_AVX
int eax, ebx, ecx, edx;
int ret=0;
@ -212,7 +212,7 @@ int support_avx(){
#endif
}
int support_avx2(){
int support_avx2(void){
#ifndef NO_AVX2
int eax, ebx, ecx=0, edx;
int ret=0;
@ -228,7 +228,7 @@ int support_avx2(){
#endif
}
int support_avx512(){
int support_avx512(void){
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@ -250,7 +250,7 @@ int support_avx512(){
#endif
}
int support_avx512_bf16(){
int support_avx512_bf16(void){
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@ -271,7 +271,7 @@ int support_avx512_bf16(){
#define BIT_AMX_BF16 0x00400000
#define BIT_AMX_ENBD 0x00060000
int support_amx_bf16() {
int support_amx_bf16(void) {
#if !defined(NO_AVX) && !defined(NO_AVX512)
int eax, ebx, ecx, edx;
int ret=0;
@ -1479,6 +1479,8 @@ int get_cpuname(void){
else
return CPUTYPE_NEHALEM;
case 15: // Sapphire Rapids
if(support_amx_bf16())
return CPUTYPE_SAPPHIRERAPIDS;
if(support_avx512_bf16())
return CPUTYPE_COOPERLAKE;
if(support_avx512())
@ -1549,6 +1551,7 @@ int get_cpuname(void){
case 7: // Raptor Lake
case 10:
case 15:
case 14: // Alder Lake N
if(support_avx2())
return CPUTYPE_HASWELL;
if(support_avx())
@ -1657,7 +1660,13 @@ int get_cpuname(void){
else
return CPUTYPE_BARCELONA;
}
case 10: // Zen3
case 10: // Zen3/4
#ifndef NO_AVX512
if(support_avx512_bf16())
return CPUTYPE_COOPERLAKE;
if(support_avx512())
return CPUTYPE_SKYLAKEX;
#endif
if(support_avx())
#ifndef NO_AVX2
return CPUTYPE_ZEN;
@ -1845,7 +1854,8 @@ static char *cpuname[] = {
"ZEN",
"SKYLAKEX",
"DHYANA",
"COOPERLAKE"
"COOPERLAKE",
"SAPPHIRERAPIDS",
};
static char *lowercpuname[] = {
@ -1902,7 +1912,8 @@ static char *lowercpuname[] = {
"zen",
"skylakex",
"dhyana",
"cooperlake"
"cooperlake",
"sapphirerapids",
};
static char *corename[] = {
@ -1936,7 +1947,8 @@ static char *corename[] = {
"ZEN",
"SKYLAKEX",
"DHYANA",
"COOPERLAKE"
"COOPERLAKE",
"SAPPHIRERAPIDS",
};
static char *corename_lower[] = {
@ -1970,7 +1982,8 @@ static char *corename_lower[] = {
"zen",
"skylakex",
"dhyana",
"cooperlake"
"cooperlake",
"sapphirerapids",
};
@ -2276,16 +2289,18 @@ int get_coretype(void){
return CORE_NEHALEM;
}
if (model == 15) { // Sapphire Rapids
if(support_amx_bf16())
return CORE_SAPPHIRERAPIDS;
if(support_avx512_bf16())
return CPUTYPE_COOPERLAKE;
return CORE_COOPERLAKE;
if(support_avx512())
return CPUTYPE_SKYLAKEX;
return CORE_SKYLAKEX;
if(support_avx2())
return CPUTYPE_HASWELL;
return CORE_HASWELL;
if(support_avx())
return CPUTYPE_SANDYBRIDGE;
return CORE_SANDYBRIDGE;
else
return CPUTYPE_NEHALEM;
return CORE_NEHALEM;
}
break;
@ -2352,6 +2367,7 @@ int get_coretype(void){
case 7: // Raptor Lake
case 10:
case 15:
case 14: // Alder Lake N
#ifndef NO_AVX2
if(support_avx2())
return CORE_HASWELL;
@ -2428,6 +2444,12 @@ int get_coretype(void){
// Ryzen 2
default:
// Matisse,Renoir Ryzen2 models
#ifndef NO_AVX512
if(support_avx512_bf16())
return CORE_COOPERLAKE;
if(support_avx512())
return CORE_SKYLAKEX;
#endif
if(support_avx())
#ifndef NO_AVX2
return CORE_ZEN;

View File

@ -173,6 +173,10 @@ HAVE_C11
ARCH_E2K
#endif
#if defined(__csky__)
ARCH_CSKY
#endif
#if defined(__EMSCRIPTEN__)
ARCH_RISCV64
OS_WINDOWS

View File

@ -40,6 +40,10 @@ else()
c_${float_char}blas1.c)
endif()
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME})
if (USE_OPENMP AND (${CMAKE_Fortran_COMPILER_ID} STREQUAL GNU) AND (${CMAKE_C_COMPILER_ID} STREQUAL Clang))
string(REGEX REPLACE "-fopenmp" "" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
target_link_libraries(x${float_char}cblat1 omp pthread)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
target_link_libraries(x${float_char}cblat1 m)
endif()
@ -65,6 +69,10 @@ else()
constant.c)
endif()
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME})
if (USE_OPENMP AND (${CMAKE_Fortran_COMPILER_ID} STREQUAL GNU) AND (${CMAKE_C_COMPILER_ID} STREQUAL Clang))
string(REGEX REPLACE "-fopenmp" "" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
target_link_libraries(x${float_char}cblat2 omp pthread)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
target_link_libraries(x${float_char}cblat2 m)
endif()
@ -80,6 +88,17 @@ if (NOT NOFORTRAN)
auxiliary.c
c_xerbla.c
constant.c)
if (USE_GEMM3M)
if ((${float_char} STREQUAL "c") OR (${float_char} STREQUAL "z"))
add_executable(x${float_char}cblat3_3m
c_${float_char}blat3_3m.f
c_${float_char}blas3_3m.c
c_${float_char}3chke_3m.c
auxiliary.c
c_xerbla.c
constant.c)
endif()
endif()
else()
add_executable(x${float_char}cblat3
c_${float_char}blat3c.c
@ -88,12 +107,44 @@ else()
auxiliary.c
c_xerbla.c
constant.c)
if (USE_GEMM3M)
if ((${float_char} STREQUAL "c") OR (${float_char} STREQUAL "z"))
add_executable(x${float_char}cblat3_3m
c_${float_char}blat3c_3m.c
c_${float_char}blas3_3m.c
c_${float_char}3chke_3m.c
auxiliary.c
c_xerbla.c
constant.c)
endif()
endif()
endif()
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME})
if (USE_OPENMP AND (${CMAKE_Fortran_COMPILER_ID} STREQUAL GNU) AND (${CMAKE_C_COMPILER_ID} STREQUAL Clang))
string(REGEX REPLACE "-fopenmp" "" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
target_link_libraries(x${float_char}cblat3 omp pthread)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
target_link_libraries(x${float_char}cblat3 m)
endif()
if (USE_GEMM3M)
if ((${float_char} STREQUAL "c") OR (${float_char} STREQUAL "z"))
target_link_libraries(x${float_char}cblat3_3m ${OpenBLAS_LIBNAME})
if (USE_OPENMP AND (${CMAKE_Fortran_COMPILER_ID} STREQUAL GNU) AND (${CMAKE_C_COMPILER_ID} STREQUAL Clang))
string(REGEX REPLACE "-fopenmp" "" CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS}")
target_link_libraries(x${float_char}cblat3 omp pthread)
endif()
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux" OR ${CMAKE_SYSTEM_NAME} MATCHES "FreeBSD" OR ${CMAKE_SYSTEM_NAME} MATCHES "QNX")
target_link_libraries(x${float_char}cblat3_3m m)
endif()
endif()
endif()
add_test(NAME "x${float_char}cblat3"
COMMAND ${test_helper} $<TARGET_FILE:x${float_char}cblat3> "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3")
if (USE_GEMM3M)
if ((${float_char} STREQUAL "c") OR (${float_char} STREQUAL "z"))
add_test(NAME "x${float_char}cblat3_3m"
COMMAND ${test_helper} $<TARGET_FILE:x${float_char}cblat3_3m> "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3_3m")
endif()
endif()
endforeach()

View File

@ -5,6 +5,24 @@
TOPDIR = ..
include $(TOPDIR)/Makefile.system
SUPPORT_GEMM3M = 0
ifeq ($(ARCH), x86)
SUPPORT_GEMM3M = 1
endif
ifeq ($(ARCH), x86_64)
SUPPORT_GEMM3M = 1
endif
ifeq ($(ARCH), ia64)
SUPPORT_GEMM3M = 1
endif
ifeq ($(ARCH), MIPS)
SUPPORT_GEMM3M = 1
endif
override CFLAGS += -DADD$(BU) -DCBLAS
ifeq ($(F_COMPILER),GFORTRAN)
override FFLAGS += -fno-tree-vectorize
@ -144,9 +162,15 @@ all3targets += xdcblat3
endif
ifeq ($(BUILD_COMPLEX),1)
all3targets += xccblat3
ifeq ($(SUPPORT_GEMM3M),1)
all3targets += xccblat3_3m
endif
endif
ifeq ($(BUILD_COMPLEX16),1)
all3targets += xzcblat3
ifeq ($(SUPPORT_GEMM3M),1)
all3targets += xzcblat3_3m
endif
endif
all3: $(all3targets)
@ -181,9 +205,9 @@ endif
endif
endif
all3_3m: xzcblat3_3m xccblat3_3m
ifeq ($(SUPPORT_GEMM3M),1)
ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1)
ifeq ($(BUILD_COMPLEX),1)
OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
endif
ifeq ($(BUILD_COMPLEX16),1)
@ -197,6 +221,7 @@ ifeq ($(BUILD_COMPLEX16),1)
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
endif
endif
endif
@ -208,12 +233,20 @@ FLDFLAGS = $(FFLAGS:-fPIC=) $(LDFLAGS)
ifeq ($(USE_OPENMP), 1)
ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(C_COMPILER), CLANG)
CEXTRALIB = -lomp
CEXTRALIB += -lomp
endif
endif
ifeq ($(F_COMPILER), NAG)
CEXTRALIB = -lgomp
endif
ifeq ($(F_COMPILER), IBM)
ifeq ($(C_COMPILER), GCC)
CEXTRALIB += -lgomp
endif
ifeq ($(C_COMPILER), CLANG)
CEXTRALIB += -lomp
endif
endif
endif
ifeq ($(BUILD_SINGLE),1)
@ -263,8 +296,10 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat2 c_cblat2.o $(ctestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
ifeq ($(SUPPORT_GEMM3M),1)
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
else
xccblat1: $(ctestl1o) c_cblat1c.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xccblat1 c_cblat1c.o $(ctestl1o) $(LIB) $(CEXTRALIB) $(filter-out -lgfortran,$(EXTRALIB))
@ -272,6 +307,10 @@ xccblat2: $(ctestl2o) c_cblat2c.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xccblat2 c_cblat2c.o $(ctestl2o) $(LIB) $(CEXTRALIB) $(filter-out -lgfortran,$(EXTRALIB))
xccblat3: $(ctestl3o) c_cblat3c.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xccblat3 c_cblat3c.o $(ctestl3o) $(LIB) $(CEXTRALIB) $(filter-out -lgfortran,$(EXTRALIB))
ifeq ($(SUPPORT_GEMM3M),1)
xccblat3_3m: $(ctestl3o_3m) c_cblat3c_3m.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xccblat3_3m c_cblat3c_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
endif
endif
@ -285,8 +324,10 @@ xzcblat2: $(ztestl2o) c_zblat2.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat2 c_zblat2.o $(ztestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
ifeq ($(SUPPORT_GEMM3M),1)
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
else
xzcblat1: $(ztestl1o) c_zblat1c.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xzcblat1 c_zblat1c.o $(ztestl1o) $(LIB) $(CEXTRALIB) $(filter-out -lgfortran,$(EXTRALIB))
@ -294,6 +335,10 @@ xzcblat2: $(ztestl2o) c_zblat2c.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xzcblat2 c_zblat2c.o $(ztestl2o) $(LIB) $(CEXTRALIB) $(filter-out -lgfortran,$(EXTRALIB))
xzcblat3: $(ztestl3o) c_zblat3c.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xzcblat3 c_zblat3c.o $(ztestl3o) $(LIB) $(CEXTRALIB) $(filter-out -lgfortran,$(EXTRALIB))
ifeq ($(SUPPORT_GEMM3M),1)
xzcblat3_3m: $(ztestl3o_3m) c_zblat3c_3m.o $(TOPDIR)/$(LIBNAME)
$(CC) $(CFLAGS) -o xzcblat3_3m c_zblat3c_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
endif
endif
endif

View File

@ -96,7 +96,7 @@
INTEGER ICAMAXTEST
EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST
* .. External Subroutines ..
EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1
EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1
* .. Intrinsic Functions ..
INTRINSIC MAX
* .. Common blocks ..
@ -214,8 +214,8 @@
CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1),
+ STRUE4(NP1),SFAC)
ELSE IF (ICASE.EQ.8) THEN
* .. CSCAL ..
CALL CSCAL(N,CA,CX,INCX)
* .. CSCALTEST ..
CALL CSCALTEST(N,CA,CX,INCX)
CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX),
+ SFAC)
ELSE IF (ICASE.EQ.9) THEN
@ -236,14 +236,14 @@
*
INCX = 1
IF (ICASE.EQ.8) THEN
* CSCAL
* CSCALTEST
* Add a test for alpha equal to zero.
CA = (0.0E0,0.0E0)
DO 80 I = 1, 5
MWPCT(I) = (0.0E0,0.0E0)
MWPCS(I) = (1.0E0,1.0E0)
80 CONTINUE
CALL CSCAL(5,CA,CX,INCX)
CALL CSCALTEST(5,CA,CX,INCX)
CALL CTEST(5,CX,MWPCT,MWPCS,SFAC)
ELSE IF (ICASE.EQ.9) THEN
* CSSCALTEST

View File

@ -242,251 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* Common Block Declarations */
@ -503,16 +258,16 @@ static integer c__1 = 1;
static integer c__5 = 5;
static real c_b43 = (float)1.;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
static real sfac = (float)9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check1_(), check2_();
extern /* Subroutine */ int check1_(real*), check2_(real*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the COMPLEX Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -553,7 +308,7 @@ static real c_b43 = (float)1.;
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -564,7 +319,7 @@ static real c_b43 = (float)1.;
/* Format strings */
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
integer s_wsfe(void), do_fio(void), e_wsfe(void);
/* .. Parameters .. */
/* .. Scalars in Common .. */
@ -577,8 +332,7 @@ static real c_b43 = (float)1.;
} /* header_ */
/* Subroutine */ int check1_(sfac)
real *sfac;
/* Subroutine */ int check1_(real* sfac)
{
/* Initialized data */
@ -683,15 +437,16 @@ real *sfac;
/* Local variables */
static integer i__;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*);
static complex mwpcs[5], mwpct[5];
extern /* Subroutine */ int itest1_(), stest1_();
extern /* Subroutine */ int itest1_(integer*, integer*), stest1_(real*,real*,real*,real*);
extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_();
static complex cx[8];
extern real scnrm2test_();
extern real scnrm2test_(integer*, complex*, integer*);
static integer np1;
extern integer icamaxtest_();
extern /* Subroutine */ int csscaltest_();
extern real scasumtest_();
extern integer icamaxtest_(integer*, complex*, integer*);
extern /* Subroutine */ int csscaltest_(integer*, real*, complex*, integer*);
extern real scasumtest_(integer*, complex*, integer*);
static integer len;
/* .. Parameters .. */
@ -727,7 +482,7 @@ real *sfac;
stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac);
} else if (combla_1.icase == 8) {
/* .. CSCAL .. */
cscal_(&combla_1.n, &ca, cx, &combla_1.incx);
cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx);
ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48],
&ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac);
} else if (combla_1.icase == 9) {
@ -761,7 +516,7 @@ real *sfac;
mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.;
/* L80: */
}
cscal_(&c__5, &ca, cx, &combla_1.incx);
cscaltest_(&c__5, &ca, cx, &combla_1.incx);
ctest_(&c__5, cx, mwpct, mwpcs, sfac);
} else if (combla_1.icase == 9) {
/* CSSCALTEST */
@ -808,8 +563,7 @@ real *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
real *sfac;
/* Subroutine */ int check2_(real* sfac)
{
/* Initialized data */
@ -981,10 +735,10 @@ real *sfac;
static complex cdot[1];
static integer lenx, leny, i__;
static complex ctemp;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, complex*, complex*, complex*, real*);
static integer ksize;
extern /* Subroutine */ int cdotctest_(), ccopytest_(), cdotutest_(),
cswaptest_(), caxpytest_();
extern /* Subroutine */ int cdotctest_(integer*, complex*, integer*, complex*, integer*,complex*), ccopytest_(integer*, complex*, integer*, complex*, integer*), cdotutest_(integer*, complex*, integer*, complex*, integer*, complex*),
cswaptest_(integer*, complex*, integer*, complex*, integer*), caxpytest_(integer*, complex*, complex*, integer*, complex*, integer*);
static integer ki, kn;
static complex cx[7], cy[7];
static integer mx, my;
@ -1067,9 +821,7 @@ real *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
real *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize,real* sfac)
{
/* System generated locals */
integer i__1;
@ -1077,7 +829,7 @@ real *scomp, *strue, *ssize, *sfac;
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static real sd;
/* ********************************* STEST ************************** */
@ -1133,11 +885,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
real *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac)
{
static real scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, real*, real*, real*, real*);
/* ************************* STEST1 ***************************** */
@ -1164,8 +915,7 @@ real *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
real *sa, *sb;
doublereal sdiff_(real* sa, real* sb)
{
/* System generated locals */
real ret_val;
@ -1179,10 +929,7 @@ real *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac)
integer *len;
complex *ccomp, *ctrue, *csize;
real *sfac;
/* Subroutine */ int ctest_(integer* len, complex* ccomp, complex* ctrue, complex* csize, real* sfac)
{
/* System generated locals */
integer i__1, i__2;
@ -1193,7 +940,7 @@ real *sfac;
/* Local variables */
static integer i__;
static real scomp[20], ssize[20], strue[20];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*);
/* **************************** CTEST ***************************** */
@ -1231,8 +978,7 @@ real *sfac;
return 0;
} /* ctest_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
/* Local variables */
static integer id;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -396,7 +273,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -414,17 +291,21 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int cchk1_(), cchk2_(), cchk3_(), cchk4_(),
cchk5_(), cchk6_();
extern /* Subroutine */ int cchk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen);
extern /* Subroutine */ int cchk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, integer*, ftnlen);
extern /* Subroutine */ int cchk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
extern /* Subroutine */ int cchk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
extern /* Subroutine */ int cchk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
extern /* Subroutine */ int cchk6_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, complex*, integer*, integer*, integer*, integer*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, complex*, real*, complex*, integer*, ftnlen);
static complex a[4225] /* was [65][65] */;
static real g[65];
static integer i__, j, n;
static logical fatal;
static complex x[65], y[65], z__[130];
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static logical trace;
static integer nidim;
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static char snaps[32], trans[1];
static integer isnum;
static logical ltest[17];
@ -438,11 +319,11 @@ static logical c_false = FALSE_;
static char snamet[12];
static real thresh;
static logical rorder;
extern /* Subroutine */ int cc2chke_();
extern /* Subroutine */ void cc2chke_(char*, ftnlen);
static integer layout;
static logical ltestt, tsterr;
static complex alf[7];
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static integer inc[7], nkb;
static complex bet[7];
static real eps, err;
@ -983,22 +864,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int cchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
complex *alf;
integer *nbet;
complex *bet;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1015,10 +881,10 @@ ftnlen sname_len;
static integer incx, incy;
static logical full, tran, null;
static integer i__, m, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha;
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
@ -1026,14 +892,15 @@ ftnlen sname_len;
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int ccgbmv_(), ccgemv_();
extern logical lceres_();
extern /* Subroutine */ int ccgbmv_(integer*, char*, integer*, integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern /* Subroutine */ void ccgemv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static char ctrans[14];
static real errmax;
static complex transl;
static char transs[1];
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als, bls;
static real err;
static integer iku, kls, kus;
@ -1448,22 +1315,7 @@ L140:
} /* cchk1_ */
/* Subroutine */ int cchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
complex *alf;
integer *nbet;
complex *bet;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, complex* alf, integer* nbet, complex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1481,10 +1333,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, k, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha;
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1495,13 +1347,14 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
extern /* Subroutine */ int cchbmv_(), cchemv_();
extern logical lceres_();
extern /* Subroutine */ int cchpmv_();
extern /* Subroutine */ void cchbmv_(integer*, char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern /* Subroutine */ void cchemv_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, ftnlen);
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cchpmv_(integer*, char*, integer*, complex*, complex*, complex*, integer*, complex*, complex*, integer*, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als, bls;
static real err;
@ -1906,19 +1759,7 @@ L130:
} /* cchk2_ */
/* Subroutine */ int cchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *xt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* xt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1937,10 +1778,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1], cdiag[14];
static integer i__, k, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1950,17 +1791,19 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, ns, lx;
extern logical lceres_();
extern /* Subroutine */ int cctbmv_(), cctbsv_();
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cctbmv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cctbsv_(integer*, char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static char ctrans[14];
extern /* Subroutine */ int cctpmv_();
extern /* Subroutine */ void cctpmv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static real errmax;
extern /* Subroutine */ int cctrmv_(), cctpsv_();
extern /* Subroutine */ void cctrmv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cctpsv_(integer*, char*, char*, char*, integer*, complex*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static complex transl;
extern /* Subroutine */ int cctrsv_();
extern /* Subroutine */ void cctrsv_(integer*, char*, char*, char*, integer*, complex*, integer*, complex*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
static integer laa, icd, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static integer ict, icu;
static real err;
@ -2418,21 +2261,7 @@ L130:
} /* cchk3_ */
/* Subroutine */ int cchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
complex *alf;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
@ -2444,21 +2273,21 @@ ftnlen sname_len;
static integer incx, incy;
static logical null;
static integer i__, j, m, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys, ia, nc, nd, im, in;
extern /* Subroutine */ int ccgerc_();
extern /* Subroutine */ void ccgerc_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*);
static integer ms, ix, iy, ns, lx, ly;
extern /* Subroutine */ int ccgeru_();
extern logical lceres_();
extern /* Subroutine */ void ccgeru_(integer*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*);
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als;
static real err;
@ -2786,21 +2615,7 @@ L150:
} /* cchk4_ */
/* Subroutine */ int cchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
complex *alf;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2818,10 +2633,12 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int cmake_(), ccher_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void ccher_(integer*, char*, integer*, real*, complex*, integer*, complex*, integer*, ftnlen);
static complex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int cchpr_(), cmvch_();
extern /* Subroutine */ void cchpr_(integer*, char*, integer*, real*, complex*, integer*, complex*, ftnlen);
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -2832,11 +2649,11 @@ ftnlen sname_len;
static logical packed;
static integer ix, ns, lx;
static real ralpha;
extern logical lceres_();
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static real err;
/* Tests CHER and CHPR. */
@ -3160,21 +2977,7 @@ L130:
} /* cchk5_ */
/* Subroutine */ int cchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
complex *alf;
integer *ninc, *inc, *nmax, *incmax;
complex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
real *g;
complex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int cchk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, complex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, complex* a, complex* aa, complex* as, complex* x, complex* xx, complex* xs, complex* y, complex* yy, complex* ys, complex* yt, real* g, complex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3192,25 +2995,26 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int cmake_();
extern /* Subroutine */ int cmake_(char*, char*, char*, integer*, integer*, complex*, integer*, complex*, integer*, integer*, integer*, logical*, complex*, ftnlen, ftnlen, ftnlen);
static complex alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int cmvch_();
extern /* Subroutine */ int cmvch_(char*, integer*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, complex*, integer*, complex*, real*, complex*, real*, real*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
static logical upper;
static char uplos[1];
extern /* Subroutine */ int ccher2_(), cchpr2_();
extern /* Subroutine */ void ccher2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, integer*, ftnlen);
extern /* Subroutine */ void cchpr2_(integer*, char*, integer*, complex*, complex*, integer*, complex*, integer*, complex*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, iy, ns, lx, ly;
extern logical lceres_();
extern logical lceres_(char*, char*, integer*, integer*, complex*, complex*, integer*, ftnlen, ftnlen);
static real errmax;
static complex transl;
static integer laa, lda;
extern logical lce_();
extern logical lce_(complex*, complex*, integer*);
static complex als;
static real err;
@ -3597,24 +3401,7 @@ L170:
} /* cchk6_ */
/* Subroutine */ int cmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
complex *alpha, *a;
integer *nmax;
complex *x;
integer *incx;
complex *beta, *y;
integer *incy;
complex *yt;
real *g;
complex *yy;
real *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int cmvch_(char* trans, integer* m, integer* n, complex* alpha, complex* a, integer* nmax, complex* x, integer* incx, complex* beta, complex* y, integer* incy, complex* yt, real* g, complex* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
@ -3812,9 +3599,7 @@ L80:
} /* cmvch_ */
logical lce_(ri, rj, lr)
complex *ri, *rj;
integer *lr;
logical lce_(complex* ri, complex* rj, integer* lr)
{
/* System generated locals */
integer i__1, i__2, i__3;
@ -3861,13 +3646,7 @@ L30:
} /* lce_ */
logical lceres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
complex *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lceres_(char* type__, char* uplo, integer* m, integer* n, complex* aa, complex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
@ -3960,9 +3739,7 @@ L80:
} /* lceres_ */
/* Complex */ VOID cbeg_( ret_val, reset)
complex * ret_val;
logical *reset;
/* Complex */ VOID cbeg_(complex* ret_val, logical* reset)
{
/* System generated locals */
real r__1, r__2;
@ -4023,8 +3800,7 @@ L10:
} /* cbeg_ */
doublereal sdiff_(x, y)
real *x, *y;
doublereal sdiff_(real* x, real* y)
{
/* System generated locals */
real ret_val;
@ -4044,19 +3820,7 @@ real *x, *y;
} /* sdiff_ */
/* Subroutine */ int cmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
complex *a;
integer *nmax;
complex *aa;
integer *lda, *kl, *ku;
logical *reset;
complex *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int cmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, complex* a, integer* nmax, complex* aa, integer* lda, integer* kl, integer* ku, logical* reset, complex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
@ -4064,7 +3828,7 @@ ftnlen diag_len;
complex q__1, q__2;
/* Local variables */
extern /* Complex */ VOID cbeg_();
extern /* Complex */ VOID cbeg_(complex*, logical*);
static integer ibeg, iend, ioff;
static logical unit;
static integer i__, j;

View File

@ -242,130 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */

3942
ctest/c_cblat3c_3m.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -21,19 +21,6 @@ typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
@ -242,124 +229,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* Common Block Declarations */
@ -375,16 +244,16 @@ struct {
static integer c__1 = 1;
static doublereal c_b34 = 1.;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
static doublereal sfac = 9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_();
extern /* Subroutine */ int check0_(doublereal*), check1_(doublereal*), check2_(doublereal*), check3_(doublereal*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the DOUBLE PRECISION Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -431,7 +300,7 @@ static doublereal c_b34 = 1.;
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -450,8 +319,7 @@ static doublereal c_b34 = 1.;
} /* header_ */
/* Subroutine */ int check0_(sfac)
doublereal *sfac;
/* Subroutine */ int check0_(doublereal* sfac)
{
/* Initialized data */
@ -464,7 +332,7 @@ doublereal *sfac;
/* Local variables */
static integer k;
extern /* Subroutine */ int drotgtest_(), stest1_();
extern /* Subroutine */ int drotgtest_(doublereal*,doublereal*,doublereal*,doublereal*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
static doublereal sa, sb, sc, ss;
/* .. Parameters .. */
@ -509,8 +377,7 @@ L40:
return 0;
} /* check0_ */
/* Subroutine */ int check1_(sfac)
doublereal *sfac;
/* Subroutine */ int check1_(doublereal* sfac)
{
/* Initialized data */
@ -535,14 +402,14 @@ doublereal *sfac;
/* Local variables */
static integer i__;
extern doublereal dnrm2test_();
extern doublereal dnrm2test_(integer*, doublereal*, integer*);
static doublereal stemp[1], strue[8];
extern /* Subroutine */ int stest_(), dscaltest_();
extern doublereal dasumtest_();
extern /* Subroutine */ int itest1_(), stest1_();
extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dscaltest_(integer*,doublereal*,doublereal*,integer*);
extern doublereal dasumtest_(integer*,doublereal*,integer*);
extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
static doublereal sx[8];
static integer np1;
extern integer idamaxtest_();
extern integer idamaxtest_(integer*,doublereal*,integer*);
static integer len;
/* .. Parameters .. */
@ -603,8 +470,7 @@ doublereal *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
doublereal *sfac;
/* Subroutine */ int check2_(doublereal* sfac)
{
/* Initialized data */
@ -649,10 +515,10 @@ doublereal *sfac;
/* Local variables */
static integer lenx, leny;
extern doublereal ddottest_();
extern doublereal ddottest_(integer*,doublereal*,integer*,doublereal*,integer*);
static integer i__, j, ksize;
extern /* Subroutine */ int stest_(), dcopytest_(), dswaptest_(),
daxpytest_(), stest1_();
extern /* Subroutine */ int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), dcopytest_(integer*,doublereal*,integer*,doublereal*,integer*), dswaptest_(integer*,doublereal*,integer*,doublereal*,integer*),
daxpytest_(integer*,doublereal*,doublereal*,integer*,doublereal*,integer*), stest1_(doublereal*,doublereal*,doublereal*,doublereal*);
static integer ki, kn, mx, my;
static doublereal sx[7], sy[7], stx[7], sty[7];
@ -733,8 +599,7 @@ doublereal *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int check3_(sfac)
doublereal *sfac;
/* Subroutine */ int check3_(doublereal* sfac)
{
/* Initialized data */
@ -753,9 +618,9 @@ doublereal *sfac;
;
/* Local variables */
extern /* Subroutine */ int drottest_();
extern /* Subroutine */ int drottest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*,doublereal*);
static integer i__, k, ksize;
extern /* Subroutine */int stest_(), drotmtest_();
extern /* Subroutine */int stest_(integer*,doublereal*,doublereal*,doublereal*,doublereal*), drotmtest_(integer*,doublereal*,integer*,doublereal*,integer*,doublereal*);
static integer ki, kn;
static doublereal dparam[5], sx[10], sy[10], stx[10], sty[10];
@ -826,9 +691,7 @@ doublereal *sfac;
return 0;
} /* check3_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
doublereal *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac)
{
/* System generated locals */
integer i__1;
@ -836,7 +699,7 @@ doublereal *scomp, *strue, *ssize, *sfac;
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(doublereal*,doublereal*);
static doublereal sd;
/* ********************************* STEST ************************** */
@ -892,11 +755,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
doublereal *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac)
{
static doublereal scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*);
/* ************************* STEST1 ***************************** */
@ -923,8 +785,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
doublereal *sa, *sb;
doublereal sdiff_(doublereal* sa, doublereal* sb)
{
/* System generated locals */
doublereal ret_val;
@ -938,8 +799,7 @@ doublereal *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
/* Local variables */
static integer id;
@ -1188,4 +1048,4 @@ doublereal *dparam;
return 0;
} /* drotm_ */
#endif
#endif

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -395,7 +272,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -413,17 +290,21 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(),
dchk5_(), dchk6_();
extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
static doublereal a[4225] /* was [65][65] */, g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublereal x[65], y[65], z__[130];
static logical trace;
static integer nidim;
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static char snaps[32], trans[1];
static integer isnum;
static logical ltest[16];
@ -437,11 +318,11 @@ static logical c_false = FALSE_;
static char snamet[12];
static doublereal thresh;
static logical rorder;
extern /* Subroutine */ int cd2chke_();
extern /* Subroutine */ void cd2chke_(char*, ftnlen);
static integer layout;
static logical ltestt, tsterr;
static doublereal alf[7];
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer inc[7], nkb;
static doublereal bet[7],eps,err;
char tmpchar;
@ -977,21 +858,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1007,10 +874,10 @@ ftnlen sname_len;
static integer incx, incy;
static logical full, tran, null;
static integer i__, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
@ -1018,13 +885,14 @@ ftnlen sname_len;
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int cdgbmv_(), cdgemv_();
extern logical lderes_();
extern /* Subroutine */ void cdgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ void cdgemv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static char ctrans[14];
static doublereal errmax, transl;
static char transs[1];
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, bls, err;
static integer iku, kls, kus;
@ -1429,21 +1297,7 @@ L140:
} /* dchk1_ */
/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1460,10 +1314,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1474,12 +1328,13 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
extern logical lderes_();
extern /* Subroutine */ int cdsbmv_(), cdspmv_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdsbmv_(integer*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ void cdspmv_(integer*, char*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
static doublereal errmax, transl;
extern /* Subroutine */ int cdsymv_();
extern /* Subroutine */ void cdsymv_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen);
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, bls, err;
@ -1882,17 +1737,7 @@ L130:
} /* dchk2_ */
/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* xt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1911,10 +1756,10 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1], cdiag[14];
static integer i__, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
@ -1924,16 +1769,19 @@ ftnlen sname_len;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, ns, lx;
extern logical lderes_();
extern /* Subroutine */ int cdtbmv_(), cdtbsv_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdtbmv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cdtbsv_(integer*, char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static char ctrans[14];
static doublereal errmax;
extern /* Subroutine */ int cdtpmv_(), cdtrmv_();
extern /* Subroutine */ void cdtpmv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cdtrmv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static doublereal transl;
extern /* Subroutine */ int cdtpsv_(), cdtrsv_();
extern /* Subroutine */ void cdtpsv_(integer*, char*, char*, char*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cdtrsv_(integer*, char*, char*, char*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
static integer laa, icd, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer ict, icu;
static doublereal err;
@ -2388,19 +2236,7 @@ L130:
} /* dchk3_ */
/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
@ -2411,17 +2247,18 @@ ftnlen sname_len;
static integer incx, incy;
static logical null;
static integer i__, j, m, n;
extern /* Subroutine */ int dmake_(), cdger_();
extern /* Subroutine */ void cdger_(integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*);
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax, transl;
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, err;
@ -2727,19 +2564,7 @@ L150:
} /* dchk4_ */
/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2757,25 +2582,25 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
extern /* Subroutine */ int cdspr_();
extern /* Subroutine */ void cdspr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
extern /* Subroutine */ int cdsyr_();
extern /* Subroutine */ void cdsyr_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen);
static logical upper;
static char uplos[1];
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, ns, lx;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax, transl;
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, err;
@ -3096,19 +2921,7 @@ L130:
} /* dchk5_ */
/* Subroutine */ int dchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *ninc, *inc, *nmax, *incmax;
doublereal *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* x, doublereal* xx, doublereal* xs, doublereal* y, doublereal* yy, doublereal* ys, doublereal* yt, doublereal* g, doublereal* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3125,24 +2938,25 @@ ftnlen sname_len;
static logical full, null;
static char uplo[1];
static integer i__, j, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char* , char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, integer*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int dmvch_();
extern /* Subroutine */ int dmvch_(char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
static logical upper;
static char uplos[1];
extern /* Subroutine */ int cdspr2_(), cdsyr2_();
extern /* Subroutine */ void cdspr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, ftnlen);
extern /* Subroutine */ void cdsyr2_(integer*, char*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, iy, ns, lx, ly;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax, transl;
static integer laa, lda;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, err;
/* Tests DSYR2 and DSPR2. */
@ -3508,25 +3322,13 @@ L170:
} /* dchk6_ */
/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublereal *a;
integer *nmax;
doublereal *aa;
integer *lda, *kl, *ku;
logical *reset;
doublereal *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
/* Local variables */
extern doublereal dbeg_();
extern doublereal dbeg_(logical* );
static integer ibeg, iend, ioff;
static logical unit;
static integer i__, j;
@ -3752,28 +3554,14 @@ ftnlen diag_len;
} /* dmake_ */
/* Subroutine */ int dmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
doublereal *alpha, *a;
integer *nmax;
doublereal *x;
integer *incx;
doublereal *beta, *y;
integer *incy;
doublereal *yt, *g, *yy, *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int dmvch_(char* trans, integer* m, integer* n, doublereal* alpha, doublereal* a, integer* nmax, doublereal* x, integer* incx, doublereal* beta, doublereal* y, integer* incy, doublereal* yt, doublereal* g, doublereal* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
doublereal d__1;
/* Builtin functions */
double sqrt();
double sqrt(double);
/* Local variables */
static doublereal erri;
@ -3902,9 +3690,7 @@ L70:
} /* dmvch_ */
logical lde_(ri, rj, lr)
doublereal *ri, *rj;
integer *lr;
logical lde_(doublereal* ri, doublereal* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -3949,13 +3735,7 @@ L30:
} /* lde_ */
logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublereal *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -4042,8 +3822,7 @@ L80:
} /* lderes_ */
doublereal dbeg_(reset)
logical *reset;
doublereal dbeg_(logical* reset)
{
/* System generated locals */
doublereal ret_val;
@ -4094,8 +3873,7 @@ L10:
} /* dbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -393,7 +270,7 @@ static logical c_true = TRUE_;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program MAIN__() */ int main()
/* Main program MAIN__() */ int main(void)
{
/* Initialized data */
@ -403,25 +280,24 @@ static logical c_false = FALSE_;
integer i__1, i__2, i__3;
doublereal d__1;
/* Builtin functions */
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
integer f_clos();
/* Local variables */
static integer nalf, idim[9];
static logical same;
static integer nbet, ntra;
static logical rewi;
extern /* Subroutine */ int dchk1_(), dchk2_(), dchk3_(), dchk4_(),
dchk5_();
extern /* Subroutine */ int dchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int dchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, doublereal*, integer*, ftnlen);
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len);
static doublereal c__[4225] /* was [65][65] */, g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublereal w[130];
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical trace;
static integer nidim;
static char snaps[32];
@ -433,11 +309,11 @@ static logical c_false = FALSE_;
static char snamet[12], transa[1], transb[1];
static doublereal thresh;
static logical rorder;
extern /* Subroutine */ int cd3chke_();
extern /* Subroutine */ void cd3chke_(char*, ftnlen);
static integer layout;
static logical ltestt, tsterr;
static doublereal alf[7];
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal bet[7], eps, err;
char tmpchar;
@ -907,21 +783,7 @@ L230:
} /* MAIN__ */
/* Subroutine */ int dchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -931,29 +793,27 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5, i__6;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static doublereal beta;
static integer ldas, ldbs, ldcs;
static logical same, null;
static integer i__, k, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13], trana, tranb;
static integer nargs;
static logical reset;
extern /* Subroutine */ void dprcn1_();
extern /* Subroutine */ void dprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, ma, mb, na, nb, nc, ik, im, in;
extern /* Subroutine */ int cdgemm_();
extern /* Subroutine */ void cdgemm_(integer*, char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static integer ks, ms, ns;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static char tranas[1], tranbs[1], transa[1], transb[1];
static doublereal errmax;
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als, bls, err;
/* Tests DGEMM. */
@ -1283,23 +1143,8 @@ L130:
} /* dchk1_ */
/* Subroutine */ void dprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *transa, *transb;
integer *m, *n, *k;
doublereal *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ void dprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char crc[14], cta[14], ctb[14];
@ -1328,21 +1173,7 @@ ftnlen transb_len;
} /* dprcn1_ */
/* Subroutine */ int dchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1353,8 +1184,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static doublereal beta;
@ -1364,21 +1193,21 @@ ftnlen sname_len;
static logical left, null;
static char uplo[1];
static integer i__, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static char sides[1];
static integer nargs;
static logical reset;
static char uplos[1];
extern /* Subroutine */ void dprcn2_();
extern /* Subroutine */ void dprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, na, nc, im, in, ms, ns;
extern logical lderes_();
extern /* Subroutine */ int cdsymm_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdsymm_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax;
static integer laa, lbb, lda, lcc, ldb, ldc;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer ics;
static doublereal als, bls;
static integer icu;
@ -1692,23 +1521,8 @@ L120:
} /* dchk2_ */
/* Subroutine */ void dprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo;
integer *m, *n;
doublereal *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
/* Subroutine */ void dprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char cs[14], cu[14], crc[14];
@ -1733,19 +1547,7 @@ ftnlen uplo_len;
} /* dprcn2_ */
/* Subroutine */ int dchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* ct, doublereal* g, doublereal* c__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1766,24 +1568,24 @@ ftnlen sname_len;
static logical left, null;
static char uplo[1];
static integer i__, j, m, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
static char diags[1];
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static char sides[1];
static integer nargs;
static logical reset;
static char uplos[1];
extern /* Subroutine */ void dprcn3_();
extern /* Subroutine */ void dprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen);
static integer ia, na, nc, im, in, ms, ns;
extern logical lderes_();
extern /* Subroutine */ int cdtrmm_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cdtrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static char tranas[1], transa[1];
extern /* Subroutine */ int cdtrsm_();
extern /* Subroutine */ void cdtrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static doublereal errmax;
static integer laa, icd, lbb, lda, ldb;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static integer ics;
static doublereal als;
static integer ict, icu;
@ -2165,24 +1967,8 @@ L160:
} /* dchk3_ */
/* Subroutine */ void dprcn3_(nout, nc, sname, iorder, side, uplo, transa,
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
transa_len, diag_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo, *transa, *diag;
integer *m, *n;
doublereal *alpha;
integer *lda, *ldb;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
ftnlen transa_len;
ftnlen diag_len;
/* Subroutine */ void dprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublereal* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cd[14], cs[14], cu[14], crc[14];
@ -2219,21 +2005,7 @@ ftnlen diag_len;
} /* dprcn3_ */
/* Subroutine */ int dchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* a, doublereal* aa, doublereal* as, doublereal* b, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2244,8 +2016,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static doublereal beta;
@ -2255,23 +2025,23 @@ ftnlen sname_len;
static logical tran, null;
static char uplo[1];
static integer i__, j, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static integer nargs;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
extern /* Subroutine */ void dprcn4_();
extern /* Subroutine */ void dprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax;
extern /* Subroutine */ int cdsyrk_();
extern /* Subroutine */ void cdsyrk_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static char transs[1];
static integer laa, lda, lcc, ldc;
extern logical lde_();
extern logical lde_(doublereal*, doublereal*, integer*);
static doublereal als;
static integer ict, icu;
static doublereal err;
@ -2586,23 +2356,8 @@ L130:
} /* dchk4_ */
/* Subroutine */ void dprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublereal *alpha;
integer *lda;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void dprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -2629,21 +2384,7 @@ ftnlen transa_len;
} /* dprcn4_ */
/* Subroutine */ int dchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
c__, cc, cs, ct, g, w, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublereal *alf;
integer *nbet;
doublereal *bet;
integer *nmax;
doublereal *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int dchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublereal* alf, integer* nbet, doublereal* bet, integer* nmax, doublereal* ab, doublereal* aa, doublereal* as, doublereal* bb, doublereal* bs, doublereal* c__, doublereal* cc, doublereal* cs, doublereal* ct, doublereal* g, doublereal* w, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2653,8 +2394,6 @@ ftnlen sname_len;
/* System generated locals */
integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static integer jjab;
@ -2665,23 +2404,23 @@ ftnlen sname_len;
static logical tran, null;
static char uplo[1];
static integer i__, j, k, n;
extern /* Subroutine */ int dmake_();
extern /* Subroutine */ int dmake_(char*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, logical*, doublereal*, ftnlen, ftnlen, ftnlen);
static doublereal alpha;
extern /* Subroutine */ int dmmch_();
extern /* Subroutine */ int dmmch_(char*, char*, integer*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, doublereal*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical isame[13];
static integer nargs;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
extern /* Subroutine */ void dprcn5_();
extern /* Subroutine */ void dprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
extern logical lderes_();
extern logical lderes_(char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal errmax;
static char transs[1];
static integer laa, lbb, lda, lcc, ldb, ldc;
extern logical lde_();
extern /* Subroutine */ int cdsyr2k_();
extern logical lde_(doublereal*, doublereal*, integer*);
extern /* Subroutine */ void cdsyr2k_(integer*, char*, char*, integer*, integer*, doublereal*, doublereal*, integer*, doublereal*, integer*, doublereal*, doublereal*, integer*, ftnlen, ftnlen);
static doublereal als;
static integer ict, icu;
static doublereal err;
@ -3048,23 +2787,8 @@ L160:
} /* dchk5_ */
/* Subroutine */ void dprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublereal *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void dprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -3091,25 +2815,13 @@ ftnlen transa_len;
} /* dprcn5_ */
/* Subroutine */ int dmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublereal *a;
integer *nmax;
doublereal *aa;
integer *lda;
logical *reset;
doublereal *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int dmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublereal* a, integer* nmax, doublereal* aa, integer* lda, logical* reset, doublereal* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
/* Local variables */
extern doublereal dbeg_();
extern doublereal dbeg_(logical*);
static integer ibeg, iend;
static logical unit;
static integer i__, j;
@ -3241,25 +2953,7 @@ ftnlen diag_len;
} /* dmake_ */
/* Subroutine */ int dmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
transa_len, transb_len)
char *transa, *transb;
integer *m, *n, *kk;
doublereal *alpha, *a;
integer *lda;
doublereal *b;
integer *ldb;
doublereal *beta, *c__;
integer *ldc;
doublereal *ct, *g, *cc;
integer *ldcc;
doublereal *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int dmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublereal* alpha, doublereal* a, integer* lda, doublereal* b, integer* ldb, doublereal* beta, doublereal* c__, integer* ldc, doublereal* ct, doublereal* g, doublereal* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
{
/* System generated locals */
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, cc_dim1,
@ -3267,8 +2961,7 @@ ftnlen transb_len;
doublereal d__1, d__2;
/* Builtin functions */
double sqrt();
integer s_wsfe(), e_wsfe(), do_fio();
double sqrt(double);
/* Local variables */
static doublereal erri;
@ -3432,9 +3125,7 @@ L150:
} /* dmmch_ */
logical lde_(ri, rj, lr)
doublereal *ri, *rj;
integer *lr;
logical lde_(doublereal* ri, doublereal* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -3481,13 +3172,7 @@ L30:
} /* lde_ */
logical lderes_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublereal *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lderes_(char* type__, char* uplo, integer* m, integer* n, doublereal* aa, doublereal* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -3576,8 +3261,7 @@ L80:
} /* lderes_ */
doublereal dbeg_(reset)
logical *reset;
doublereal dbeg_(logical* reset)
{
/* System generated locals */
doublereal ret_val;
@ -3629,8 +3313,7 @@ L10:
} /* dbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;

View File

@ -21,19 +21,6 @@ typedef float real;
typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
#define pCd(z) (*_pCd(z))
typedef int logical;
typedef short int shortlogical;
typedef char logical1;
@ -242,250 +229,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* Common Block Declarations */
@ -502,16 +245,16 @@ struct {
static integer c__1 = 1;
static real c_b34 = (float)1.;
/* Main program */ int main ()
/* Main program */ int main (void)
{
/* Initialized data */
static real sfac = (float)9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check0_(), check1_(), check2_(), check3_();
extern /* Subroutine */ int check0_(real*), check1_(real*), check2_(real*), check3_(real*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the REAL Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -557,7 +300,7 @@ static real c_b34 = (float)1.;
exit(0);
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -580,8 +323,7 @@ static real c_b34 = (float)1.;
} /* header_ */
/* Subroutine */ int check0_(sfac)
real *sfac;
/* Subroutine */ int check0_(real *sfac)
{
/* Initialized data */
@ -600,7 +342,7 @@ real *sfac;
/* Local variables */
static integer k;
extern /* Subroutine */ int srotgtest_(), stest1_();
extern /* Subroutine */ int srotgtest_(real*,real*,real*,real*), stest1_(real*,real*,real*,real*);
static real sa, sb, sc, ss;
/* .. Parameters .. */
@ -645,8 +387,7 @@ L40:
return 0;
} /* check0_ */
/* Subroutine */ int check1_(sfac)
real *sfac;
/* Subroutine */ int check1_(real* sfac)
{
/* Initialized data */
@ -692,14 +433,14 @@ real *sfac;
/* Local variables */
static integer i__;
extern real snrm2test_();
extern real snrm2test_(integer*,real*,integer*);
static real stemp[1], strue[8];
extern /* Subroutine */ int stest_(), sscaltest_();
extern real sasumtest_();
extern /* Subroutine */ int itest1_(), stest1_();
extern /* Subroutine */ int stest_(integer*, real*,real*,real*,real*), sscaltest_(integer*,real*,real*,integer*);
extern real sasumtest_(integer*,real*,integer*);
extern /* Subroutine */ int itest1_(integer*,integer*), stest1_(real*,real*,real*,real*);
static real sx[8];
static integer np1;
extern integer isamaxtest_();
extern integer isamaxtest_(integer*,real*,integer*);
static integer len;
@ -761,8 +502,7 @@ real *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
real *sfac;
/* Subroutine */ int check2_(real* sfac)
{
/* Initialized data */
@ -850,12 +590,12 @@ real *sfac;
/* Local variables */
static integer lenx, leny;
extern real sdottest_();
extern real sdottest_(integer*,real*,integer*,real*,integer*);
static integer i__, j, ksize;
extern /* Subroutine */ int stest_(), scopytest_(), sswaptest_(),
saxpytest_();
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), scopytest_(integer*,real*,integer*,real*,integer*), sswaptest_(integer*,real*,integer*,real*,integer*),
saxpytest_(integer*,real*,real*,integer*,real*,integer*);
static integer ki;
extern /* Subroutine */ int stest1_();
extern /* Subroutine */ int stest1_(real*,real*,real*,real*);
static integer kn, mx, my;
static real sx[7], sy[7], stx[7], sty[7];
@ -936,8 +676,7 @@ real *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int check3_(sfac)
real *sfac;
/* Subroutine */ int check3_(real* sfac)
{
/* Initialized data */
@ -969,9 +708,9 @@ real *sfac;
1.17 };
/* Local variables */
extern /* Subroutine */ void srottest_();
extern /* Subroutine */ void srottest_(integer*,real*,integer*,real*,integer*,real*,real*);
static integer i__, k, ksize;
extern /* Subroutine */ int stest_(), srotmtest_();
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*), srotmtest_(integer*,real*,integer*,real*,integer*,real*);
static integer ki, kn;
static real sx[19], sy[19], sparam[5], stx[19], sty[19];
@ -1042,16 +781,14 @@ real *sfac;
return 0;
} /* check3_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
real *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, real* scomp, real* strue, real* ssize, real* sfac)
{
integer i__1;
real r__1, r__2, r__3, r__4, r__5;
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(real*,real*);
static real sd;
/* ********************************* STEST ************************** */
@ -1107,11 +844,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
real *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(real* scomp1, real* strue1, real* ssize, real* sfac)
{
static real scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*,real*,real*,real*,real*);
/* ************************* STEST1 ***************************** */
@ -1138,8 +874,7 @@ real *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
real *sa, *sb;
doublereal sdiff_(real* sa, real* sb)
{
/* System generated locals */
real ret_val;
@ -1153,8 +888,7 @@ real *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
/* Local variables */
static integer id;

View File

@ -242,255 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -521,7 +272,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -539,16 +290,20 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(),
schk5_(), schk6_();
extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, integer*, integer*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len);
static real a[4225] /* was [65][65] */, g[65];
static integer i__, j, n;
static logical fatal;
static real x[65], y[65], z__[130];
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static logical trace;
static integer nidim;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static char snaps[32], trans[1];
static integer isnum;
static logical ltest[16];
@ -564,12 +319,12 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt;
extern /* Subroutine */ int cs2chke_();
extern /* Subroutine */ int cs2chke_(char*, ftnlen);
static logical tsterr;
static real alf[7];
static integer inc[7], nkb;
static real bet[7];
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real eps, err;
char tmpchar;
@ -1098,21 +853,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1130,24 +871,25 @@ ftnlen sname_len;
static integer i__, m, n;
static real alpha;
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static integer incxs, incys;
static char trans[1];
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int csgbmv_(), csgemv_();
extern /* Subroutine */ void csgbmv_(integer*, char*, integer*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ void csgemv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
static char ctrans[14];
static real errmax;
extern logical lseres_();
extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len);
static real transl;
static char transs[1];
static integer laa, lda;
static real als, bls;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
static integer iku, kls, kus;
@ -1552,21 +1294,7 @@ L140:
} /* schk1_ */
/* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, real* alf, integer* nbet, real* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1585,9 +1313,9 @@ ftnlen sname_len;
static integer i__, k, n;
static real alpha;
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs, incys;
@ -1598,13 +1326,14 @@ ftnlen sname_len;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
static real errmax;
extern logical lseres_();
extern /* Subroutine */ int cssbmv_();
extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cssbmv_(integer*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
static real transl;
extern /* Subroutine */ int csspmv_(), cssymv_();
extern /* Subroutine */ void csspmv_(integer*, char*, integer*, real*, real*, real*, integer*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ void cssymv_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen);
static integer laa, lda;
static real als, bls;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYMV, SSBMV and SSPMV. */
@ -2003,17 +1732,7 @@ L130:
} /* schk2_ */
/* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *xt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* xt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2034,9 +1753,9 @@ ftnlen sname_len;
static integer i__, k, n;
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
@ -2047,14 +1766,17 @@ ftnlen sname_len;
static integer nk, ks, ix, ns, lx;
static char ctrans[14];
static real errmax;
extern logical lseres_();
extern /* Subroutine */ int cstbmv_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cstbmv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static real transl;
extern /* Subroutine */ int cstbsv_();
extern /* Subroutine */ void cstbsv_(integer*, char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
extern /* Subroutine */ int cstpmv_(), cstrmv_(), cstpsv_(), cstrsv_();
extern /* Subroutine */ void cstpmv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstrmv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstpsv_(integer*, char*, char*, char*, integer*, real*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstrsv_(integer*, char*, char*, char*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lda, ict, icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests STRMV, STBMV, STPMV, STRSV, STBSV and STPSV. */
@ -2508,19 +2230,7 @@ L130:
} /* schk3_ */
/* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6;
@ -2533,17 +2243,18 @@ ftnlen sname_len;
static integer i__, j, m, n;
static real alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int smake_(), csger_();
/* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void csger_(integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, integer*);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static integer incxs, incys, ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
static real errmax;
extern logical lseres_();
extern logical lseres_(char* , char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static real transl;
static integer laa, lda;
static real als;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SGER. */
@ -2848,19 +2559,7 @@ L150:
} /* schk4_ */
/* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2880,25 +2579,25 @@ ftnlen sname_len;
static integer i__, j, n;
static real alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
extern /* Subroutine */ int csspr_();
extern /* Subroutine */ void csspr_(integer*, char*, integer*, real*, real*, integer*, real*, ftnlen);
static logical upper;
static char uplos[1];
extern /* Subroutine */ int cssyr_();
extern /* Subroutine */ void cssyr_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, ns, lx;
static real errmax;
extern logical lseres_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static real transl;
static integer laa, lda;
static real als;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYR and SSPR. */
@ -3218,19 +2917,7 @@ L130:
} /* schk5_ */
/* Subroutine */ int schk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *ninc, *inc, *nmax, *incmax;
real *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt, *g, *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk6_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, real* a, real* aa, real* as, real* x, real* xx, real* xs, real* y, real* yy, real* ys, real* yt, real* g, real* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3249,26 +2936,26 @@ ftnlen sname_len;
static integer i__, j, n;
static real alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, integer*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int smvch_();
extern /* Subroutine */ int smvch_(char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, real*, real*, logical*, integer*, logical*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs, incys;
static logical upper;
static char uplos[1];
static integer ia, ja, ic;
extern /* Subroutine */ int csspr2_();
extern /* Subroutine */ void csspr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, ftnlen);
static integer nc, jj, lj, in;
static logical packed;
extern /* Subroutine */ int cssyr2_();
extern /* Subroutine */ void cssyr2_(integer*, char*, integer*, real*, real*, integer*, real*, integer*, real*, integer*, ftnlen);
static integer ix, iy, ns, lx, ly;
static real errmax;
extern logical lseres_();
extern logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len);
static real transl;
static integer laa, lda;
static real als;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYR2 and SSPR2. */
@ -3634,26 +3321,14 @@ L170:
} /* schk6_ */
/* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
real *a;
integer *nmax;
real *aa;
integer *lda, *kl, *ku;
logical *reset;
real *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
{
/* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, integer* kl, integer* ku, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
/* Local variables */
static integer ibeg, iend;
extern doublereal sbeg_();
extern doublereal sbeg_(logical*);
static integer ioff;
static logical unit;
static integer i__, j;
@ -3879,28 +3554,14 @@ ftnlen diag_len;
} /* smake_ */
/* Subroutine */ int smvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
real *alpha, *a;
integer *nmax;
real *x;
integer *incx;
real *beta, *y;
integer *incy;
real *yt, *g, *yy, *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int smvch_(char* trans, integer* m, integer* n, real* alpha, real* a, integer* nmax, real* x, integer* incx, real* beta, real* y, integer* incy, real* yt, real* g, real* yy, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
real r__1;
/* Builtin functions */
double sqrt();
double sqrt(double);
/* Local variables */
static real erri;
@ -4029,9 +3690,7 @@ L70:
} /* smvch_ */
logical lse_(ri, rj, lr)
real *ri, *rj;
integer *lr;
logical lse_(real* ri, real* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -4076,13 +3735,7 @@ L30:
} /* lse_ */
logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
real *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen ltype_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -4169,8 +3822,7 @@ L80:
} /* lseres_ */
doublereal sbeg_(reset)
logical *reset;
doublereal sbeg_(logical* reset)
{
/* System generated locals */
real ret_val;
@ -4221,8 +3873,7 @@ L10:
} /* sbeg_ */
doublereal sdiff_(x, y)
real *x, *y;
doublereal sdiff_(real* x, real* y)
{
/* System generated locals */
real ret_val;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -393,7 +270,7 @@ static logical c_true = TRUE_;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program MAIN__() */ int main()
/* Main program MAIN__() */ int main(void)
{
/* Initialized data */
@ -402,26 +279,25 @@ static logical c_false = FALSE_;
/* System generated locals */
integer i__1, i__2, i__3;
real r__1;
/* Builtin functions */
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
integer f_clos();
/* Local variables */
static integer nalf, idim[9];
static logical same;
static integer nbet, ntra;
static logical rewi;
extern /* Subroutine */ int schk1_(), schk2_(), schk3_(), schk4_(),
schk5_();
extern /* Subroutine */ int schk1_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk2_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk3_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk4_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
extern /* Subroutine */ int schk5_(char*, real*, real*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, real*, integer*, real*, integer*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, real*, integer*, ftnlen);
static real c__[4225] /* was [65][65] */, g[65];
static integer i__, j, n;
static logical fatal;
static real w[130];
extern doublereal sdiff_();
extern doublereal sdiff_(real*, real*);
static logical trace;
static integer nidim;
extern /* Subroutine */ int smmch_();
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static char snaps[32];
static integer isnum;
static logical ltest[6];
@ -433,9 +309,9 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt, tsterr;
extern /* Subroutine */ int cs3chke_();
extern /* Subroutine */ void cs3chke_(char*, ftnlen);
static real alf[7], bet[7];
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real eps, err;
char tmpchar;
@ -899,21 +775,7 @@ L230:
} /* MAIN__ */
/* Subroutine */ int schk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk1_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -923,8 +785,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5, i__6;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static real beta;
@ -936,18 +796,17 @@ ftnlen sname_len;
static logical trana, tranb;
static integer nargs;
static logical reset;
extern /* Subroutine */ void sprcn1_();
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smmch_();
extern /* Subroutine */ void sprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static integer ia, ib, ma, mb, na, nb, nc, ik, im, in, ks, ms, ns;
extern /* Subroutine */ int csgemm_();
extern /* Subroutine */ void csgemm_(integer*, char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static char tranas[1], tranbs[1], transa[1], transb[1];
static real errmax;
extern logical lseres_();
extern logical lse_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern logical lse_(real*, real*, integer*);
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
static real als, bls;
extern logical lse_();
static real err;
/* Tests SGEMM. */
@ -1278,23 +1137,8 @@ L130:
/* Subroutine */ void sprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *transa, *transb;
integer *m, *n, *k;
real *alpha;
integer *lda, *ldb;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ void sprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char crc[14], cta[14], ctb[14];
@ -1324,21 +1168,7 @@ ftnlen transb_len;
} /* sprcn1_ */
/* Subroutine */ int schk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk2_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1349,8 +1179,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static real beta;
@ -1368,15 +1196,15 @@ ftnlen sname_len;
static char uplos[1];
static integer ia, ib, na, nc, im, in, ms, ns;
static real errmax;
extern logical lseres_();
extern /* Subroutine */ int cssymm_();
extern void sprcn2_();
extern int smake_();
extern int smmch_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cssymm_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern void sprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static integer laa, lbb, lda, lcc, ldb, ldc, ics;
static real als, bls;
static integer icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYMM. */
@ -1685,23 +1513,8 @@ L120:
} /* schk2_ */
/* Subroutine */ void sprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo;
integer *m, *n;
real *alpha;
integer *lda, *ldb;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
/* Subroutine */ void sprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char cs[14], cu[14], crc[14];
@ -1726,19 +1539,7 @@ ftnlen uplo_len;
} /* sprcn2_ */
/* Subroutine */ int schk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *ct, *g, *c__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk3_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* ct, real* g, real* c__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1751,8 +1552,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static char diag[1];
@ -1769,18 +1568,19 @@ ftnlen sname_len;
static integer nargs;
static logical reset;
static char uplos[1];
extern /* Subroutine */ void sprcn3_();
extern /* Subroutine */ void sprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, real*, integer*, integer*, ftnlen , ftnlen, ftnlen, ftnlen, ftnlen);
static integer ia, na, nc, im, in, ms, ns;
static char tranas[1], transa[1];
static real errmax;
extern int smake_();
extern int smmch_();
extern logical lseres_();
extern /* Subroutine */ int cstrmm_(), cstrsm_();
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cstrmm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cstrsm_(integer*, char*, char*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lbb, lda, ldb, ics;
static real als;
static integer ict, icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests STRMM and STRSM. */
@ -2155,24 +1955,8 @@ L160:
} /* schk3_ */
/* Subroutine */ void sprcn3_(nout, nc, sname, iorder, side, uplo, transa,
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
transa_len, diag_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo, *transa, *diag;
integer *m, *n;
real *alpha;
integer *lda, *ldb;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
ftnlen transa_len;
ftnlen diag_len;
/* Subroutine */ void sprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, real* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cd[14], cs[14], cu[14], crc[14];
@ -2210,21 +1994,7 @@ ftnlen diag_len;
} /* sprcn3_ */
/* Subroutine */ int schk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct, *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk4_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* a, real* aa, real* as, real* b, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2235,8 +2005,6 @@ ftnlen sname_len;
integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
i__3, i__4, i__5;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static real beta;
@ -2253,18 +2021,18 @@ ftnlen sname_len;
static char trans[1];
static logical upper;
static char uplos[1];
extern /* Subroutine */ void sprcn4_();
extern /* Subroutine */ int smake_();
extern /* Subroutine */ int smmch_();
extern /* Subroutine */ void sprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static integer ia, ib, jc, ma, na, nc, ik, in, jj, lj, ks, ns;
static real errmax;
extern logical lseres_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static char transs[1];
extern /* Subroutine */ int cssyrk_();
extern /* Subroutine */ void cssyrk_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
static integer laa, lda, lcc, ldc;
static real als;
static integer ict, icu;
extern logical lse_();
extern logical lse_(real*, real*, integer*);
static real err;
/* Tests SSYRK. */
@ -2575,23 +2343,8 @@ L130:
} /* schk4_ */
/* Subroutine */ void sprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
real *alpha;
integer *lda;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void sprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -2619,21 +2372,7 @@ ftnlen transa_len;
} /* sprcn4_ */
/* Subroutine */ int schk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
c__, cc, cs, ct, g, w, iorder, sname_len)
char *sname;
real *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
real *alf;
integer *nbet;
real *bet;
integer *nmax;
real *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct, *g, *w;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int schk5_(char* sname, real* eps, real* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, real* alf, integer* nbet, real* bet, integer* nmax, real* ab, real* aa, real* as, real* bb, real* bs, real* c__, real* cc, real* cs, real* ct, real* g, real* w, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2643,8 +2382,6 @@ ftnlen sname_len;
/* System generated locals */
integer c_dim1, c_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7, i__8;
/* Builtin functions */
integer f_rew(), s_wsfe(), e_wsfe(), do_fio();
/* Local variables */
static integer jjab;
@ -2663,18 +2400,18 @@ ftnlen sname_len;
static logical upper;
static char uplos[1];
static integer ia, ib;
extern /* Subroutine */ void sprcn5_();
extern /* Subroutine */ void sprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, real*, integer*, integer*, real*, integer*, ftnlen, ftnlen, ftnlen);
static integer jc, ma, na, nc, ik, in, jj, lj, ks, ns;
static real errmax;
extern logical lseres_();
extern int smake_();
extern logical lseres_(char*, char*, integer*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ int smake_(char*, char*, char*, integer*, integer*, real*, integer*, real*, integer*, logical*, real*, ftnlen, ftnlen, ftnlen);
static char transs[1];
static integer laa, lbb, lda, lcc, ldb, ldc;
static real als;
static integer ict, icu;
extern /* Subroutine */ int cssyr2k_();
extern logical lse_();
extern int smmch_();
extern /* Subroutine */ void cssyr2k_(integer*, char*, char*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, ftnlen, ftnlen);
extern logical lse_(real*, real*, integer*);
extern /* Subroutine */ int smmch_(char*, char*, integer*, integer*, integer*, real*, real*, integer*, real*, integer*, real*, real*, integer*, real*, real*, real*, integer*, real*, real*, logical*, integer*, logical*, ftnlen, ftnlen);
static real err;
/* Tests SSYR2K. */
@ -3037,23 +2774,8 @@ L160:
} /* schk5_ */
/* Subroutine */ void sprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
real *alpha;
integer *lda, *ldb;
real *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ void sprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, real* alpha, integer* lda, integer* ldb, real* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Builtin functions */
integer s_wsfe(), do_fio(), e_wsfe();
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -3081,19 +2803,7 @@ ftnlen transa_len;
} /* sprcn5_ */
/* Subroutine */ int smake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
real *a;
integer *nmax;
real *aa;
integer *lda;
logical *reset;
real *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int smake_(char* type__, char* uplo, char* diag, integer* m, integer* n, real* a, integer* nmax, real* aa, integer* lda, logical* reset, real* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2;
@ -3102,7 +2812,7 @@ ftnlen diag_len;
/* Local variables */
static integer ibeg, iend;
extern doublereal sbeg_();
extern doublereal sbeg_(logical*);
static logical unit;
static integer i__, j;
static logical lower, upper, gen, tri, sym;
@ -3233,25 +2943,7 @@ ftnlen diag_len;
} /* smake_ */
/* Subroutine */ int smmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
transa_len, transb_len)
char *transa, *transb;
integer *m, *n, *kk;
real *alpha, *a;
integer *lda;
real *b;
integer *ldb;
real *beta, *c__;
integer *ldc;
real *ct, *g, *cc;
integer *ldcc;
real *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int smmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, real* alpha, real* a, integer* lda, real* b, integer* ldb, real* beta, real* c__, integer* ldc, real* ct, real* g, real* cc, integer* ldcc, real* eps, real* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
{
/* System generated locals */
@ -3260,8 +2952,7 @@ ftnlen transb_len;
real r__1, r__2;
/* Builtin functions */
double sqrt();
integer s_wsfe(), e_wsfe(), do_fio();
double sqrt(double);
/* Local variables */
static real erri;
@ -3426,9 +3117,7 @@ L150:
} /* smmch_ */
logical lse_(ri, rj, lr)
real *ri, *rj;
integer *lr;
logical lse_(real* ri, real* rj, integer* lr)
{
/* System generated locals */
integer i__1;
@ -3475,13 +3164,7 @@ L30:
} /* lse_ */
logical lseres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
real *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lseres_(char* type__, char* uplo, integer* m, integer* n, real* aa, real* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2;
@ -3572,8 +3255,7 @@ L80:
} /* lseres_ */
doublereal sbeg_(reset)
logical *reset;
doublereal sbeg_(logical* reset)
{
/* System generated locals */
real ret_val;
@ -3625,8 +3307,7 @@ L10:
} /* sbeg_ */
doublereal sdiff_(x, y)
real *x, *y;
doublereal sdiff_(real* x, real* y)
{
/* System generated locals */
real ret_val;

View File

@ -242,250 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
#if 0
static inline void cdotc_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i]))._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i]))._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conjf(Cf(&x[i*incx]))._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += conjf(Cf(&x[i*incx]))._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i])) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conjf(Cf(&x[i*incx])) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotc_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i]))._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i]))._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += conj(Cd(&x[i*incx]))._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += conj(Cd(&x[i*incx]))._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i])) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += conj(Cd(&x[i*incx])) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
static inline void cdotu_(complex *z, integer *n_, complex *x, integer *incx_, complex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Fcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i])._Val[0] * Cf(&y[i])._Val[0];
zdotc._Val[1] += Cf(&x[i])._Val[1] * Cf(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cf(&x[i*incx])._Val[0] * Cf(&y[i*incy])._Val[0];
zdotc._Val[1] += Cf(&x[i*incx])._Val[1] * Cf(&y[i*incy])._Val[1];
}
}
pCf(z) = zdotc;
}
#else
_Complex float zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i]) * Cf(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cf(&x[i*incx]) * Cf(&y[i*incy]);
}
}
pCf(z) = zdotc;
}
#endif
static inline void zdotu_(doublecomplex *z, integer *n_, doublecomplex *x, integer *incx_, doublecomplex *y, integer *incy_) {
integer n = *n_, incx = *incx_, incy = *incy_, i;
#ifdef _MSC_VER
_Dcomplex zdotc = {0.0, 0.0};
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i])._Val[0] * Cd(&y[i])._Val[0];
zdotc._Val[1] += Cd(&x[i])._Val[1] * Cd(&y[i])._Val[1];
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc._Val[0] += Cd(&x[i*incx])._Val[0] * Cd(&y[i*incy])._Val[0];
zdotc._Val[1] += Cd(&x[i*incx])._Val[1] * Cd(&y[i*incy])._Val[1];
}
}
pCd(z) = zdotc;
}
#else
_Complex double zdotc = 0.0;
if (incx == 1 && incy == 1) {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i]) * Cd(&y[i]);
}
} else {
for (i=0;i<n;i++) { /* zdotc = zdotc + dconjg(x(i))* y(i) */
zdotc += Cd(&x[i*incx]) * Cd(&y[i*incy]);
}
}
pCd(z) = zdotc;
}
#endif
#endif
/* Common Block Declarations */
@ -502,16 +258,16 @@ static integer c__1 = 1;
static integer c__5 = 5;
static doublereal c_b43 = 1.;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
static doublereal sfac = 9.765625e-4;
/* Local variables */
extern /* Subroutine */ int check1_(), check2_();
extern /* Subroutine */ int check1_(doublereal*), check2_(doublereal*);
static integer ic;
extern /* Subroutine */ int header_();
extern /* Subroutine */ int header_(void);
/* Test program for the COMPLEX*16 Level 1 CBLAS. */
/* Based upon the original CBLAS test routine together with: */
@ -551,7 +307,7 @@ static doublereal c_b43 = 1.;
exit(0);
} /* MAIN__ */
/* Subroutine */ int header_()
/* Subroutine */ int header_(void)
{
/* Initialized data */
@ -570,8 +326,7 @@ static doublereal c_b43 = 1.;
} /* header_ */
/* Subroutine */ int check1_(sfac)
doublereal *sfac;
/* Subroutine */ int check1_(doublereal* sfac)
{
/* Initialized data */
@ -623,15 +378,15 @@ doublereal *sfac;
/* Local variables */
static integer i__;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*);
static doublecomplex mwpcs[5], mwpct[5];
extern /* Subroutine */ int zscaltest_(), itest1_(), stest1_();
extern /* Subroutine */ int zscaltest_(integer*, doublereal*, doublecomplex*, integer*), itest1_(integer*, integer*), stest1_(doublereal*, doublereal*, doublereal*, doublereal*);
static doublecomplex cx[8];
extern doublereal dznrm2test_();
extern doublereal dznrm2test_(integer*, doublecomplex*, integer*);
static integer np1;
extern /* Subroutine */ int zdscaltest_();
extern integer izamaxtest_();
extern doublereal dzasumtest_();
extern /* Subroutine */ int zdscaltest_(integer*, doublereal*, doublecomplex*, integer*);
extern integer izamaxtest_(integer*, doublecomplex*, integer*);
extern doublereal dzasumtest_(integer*, doublecomplex*, integer*);
static integer len;
/* .. Parameters .. */
@ -748,8 +503,7 @@ doublereal *sfac;
return 0;
} /* check1_ */
/* Subroutine */ int check2_(sfac)
doublereal *sfac;
/* Subroutine */ int check2_(doublereal* sfac)
{
/* Initialized data */
@ -834,14 +588,14 @@ doublereal *sfac;
/* Local variables */
static doublecomplex cdot[1];
static integer lenx, leny, i__;
extern /* Subroutine */ int ctest_();
extern /* Subroutine */ int ctest_(integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*);
static integer ksize;
static doublecomplex ztemp;
extern /* Subroutine */ int zdotctest_(), zcopytest_();
extern /* Subroutine */ int zdotctest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zcopytest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static integer ki;
extern /* Subroutine */ int zdotutest_(), zswaptest_();
extern /* Subroutine */ int zdotutest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*), zswaptest_(integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static integer kn;
extern /* Subroutine */ int zaxpytest_();
extern /* Subroutine */ int zaxpytest_(integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*);
static doublecomplex cx[7], cy[7];
static integer mx, my;
@ -923,20 +677,18 @@ doublereal *sfac;
return 0;
} /* check2_ */
/* Subroutine */ int stest_(len, scomp, strue, ssize, sfac)
integer *len;
doublereal *scomp, *strue, *ssize, *sfac;
/* Subroutine */ int stest_(integer* len, doublereal* scomp, doublereal* strue, doublereal* ssize, doublereal* sfac)
{
/* System generated locals */
integer i__1;
doublereal d__1, d__2, d__3, d__4, d__5;
/* Builtin functions */
integer s_wsfe(), e_wsfe(), do_fio();
integer s_wsfe(void), e_wsfe(void), do_fio(void);
/* Local variables */
static integer i__;
extern doublereal sdiff_();
extern doublereal sdiff_(doublereal*, doublereal*);
static doublereal sd;
/* ********************************* STEST ************************** */
@ -992,11 +744,10 @@ L40:
} /* stest_ */
/* Subroutine */ int stest1_(scomp1, strue1, ssize, sfac)
doublereal *scomp1, *strue1, *ssize, *sfac;
/* Subroutine */ int stest1_(doublereal* scomp1, doublereal* strue1, doublereal* ssize, doublereal* sfac)
{
static doublereal scomp[1], strue[1];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*,doublereal*, doublereal*, doublereal*, doublereal*);
/* ************************* STEST1 ***************************** */
@ -1023,8 +774,7 @@ doublereal *scomp1, *strue1, *ssize, *sfac;
return 0;
} /* stest1_ */
doublereal sdiff_(sa, sb)
doublereal *sa, *sb;
doublereal sdiff_(doublereal* sa, doublereal* sb)
{
/* System generated locals */
doublereal ret_val;
@ -1038,10 +788,7 @@ doublereal *sa, *sb;
return ret_val;
} /* sdiff_ */
/* Subroutine */ int ctest_(len, ccomp, ctrue, csize, sfac)
integer *len;
doublecomplex *ccomp, *ctrue, *csize;
doublereal *sfac;
/* Subroutine */ int ctest_(integer* len, doublecomplex* ccomp, doublecomplex* ctrue, doublecomplex* csize, doublereal* sfac)
{
/* System generated locals */
integer i__1, i__2;
@ -1049,7 +796,7 @@ doublereal *sfac;
/* Local variables */
static integer i__;
static doublereal scomp[20], ssize[20], strue[20];
extern /* Subroutine */ int stest_();
extern /* Subroutine */ int stest_(integer*, doublereal*, doublereal*, doublereal*, doublereal*);
/* **************************** CTEST ***************************** */
@ -1087,8 +834,7 @@ doublereal *sfac;
return 0;
} /* ctest_ */
/* Subroutine */ int itest1_(icomp, itrue)
integer *icomp, *itrue;
/* Subroutine */ int itest1_(integer* icomp, integer* itrue)
{
static integer id;

View File

@ -242,129 +242,6 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* -- translated by f2c (version 20000121).
You must link the resulting object file with the libraries:
-lf2c -lm (in that order)
*/
/* Common Block Declarations */
@ -396,7 +273,7 @@ static integer c_n1 = -1;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program */ int main()
/* Main program */ int main(void)
{
/* Initialized data */
@ -414,19 +291,23 @@ static logical c_false = FALSE_;
static logical same;
static integer ninc, nbet, ntra;
static logical rewi;
extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(),
zchk5_(), zchk6_();
extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk6_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
static doublecomplex a[4225] /* was [65][65] */;
static doublereal g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublecomplex x[65], y[65], z__[130];
static logical trace;
static integer nidim;
static char snaps[32], trans[1];
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer isnum;
static logical ltest[17];
static doublecomplex aa[4225];
@ -441,12 +322,12 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt, tsterr;
extern /* Subroutine */ int cz2chke_();
extern /* Subroutine */ void cz2chke_(char*, ftnlen);
static doublecomplex alf[7];
static integer inc[7], nkb;
static doublecomplex bet[7];
static doublereal eps, err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
char tmpchar;
/* Test program for the DOUBLE PRECISION COMPLEX Level 2 Blas. */
@ -984,22 +865,7 @@ L240:
} /* MAIN__ */
/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1018,27 +884,27 @@ ftnlen sname_len;
static integer i__, m, n;
static doublecomplex alpha;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
static char trans[1];
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer ia, ib, ic;
static logical banded;
static integer nc, nd, im, in, kl, ml, nk, nl, ku, ix, iy, ms, lx, ly, ns;
extern /* Subroutine */ int czgbmv_();
extern /* Subroutine */ void czgbmv_(integer*, char*, integer*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static char ctrans[14];
extern /* Subroutine */ int czgemv_();
extern /* Subroutine */ void czgemv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char transs[1];
static integer laa, lda;
static doublecomplex als, bls;
static doublereal err;
static integer iku, kls;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
static integer kus;
@ -1451,22 +1317,7 @@ L140:
} /* zchk1_ */
/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, nalf, alf, nbet, bet, ninc, inc, nmax,
incmax, a, aa, as, x, xx, xs, y, yy, ys, yt, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1486,27 +1337,28 @@ ftnlen sname_len;
static integer i__, k, n;
static doublecomplex alpha;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static char uplos[1];
static integer ia, ib, ic;
static logical banded;
static integer nc, ik, in;
static logical packed;
static integer nk, ks, ix, iy, ns, lx, ly;
extern /* Subroutine */ int czhbmv_(), czhemv_();
extern /* Subroutine */ void czhbmv_(integer*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ void czhemv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern /* Subroutine */ int czhpmv_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void czhpmv_(integer*, char*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen);
static integer laa, lda;
static doublecomplex als, bls;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests CHEMV, CHBMV and CHPMV. */
@ -1909,19 +1761,7 @@ L130:
} /* zchk2_ */
/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nkb, kb, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, xt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nkb, *kb, *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *xt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nkb, integer* kb, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* xt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1942,13 +1782,13 @@ ftnlen sname_len;
static integer i__, k, n;
static char diags[1];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs;
static char trans[1];
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static char uplos[1];
static logical banded;
static integer nc, ik, in;
@ -1957,14 +1797,17 @@ ftnlen sname_len;
static char ctrans[14];
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern /* Subroutine */ int cztbmv_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cztbmv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static char transs[1];
extern /* Subroutine */ int cztbsv_(), cztpmv_(), cztrmv_(), cztpsv_(),
cztrsv_();
extern /* Subroutine */ void cztbsv_(integer*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztpmv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztpsv_(integer*, char*, char*, char*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztrmv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztrsv_(integer*, char*, char*, char*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lda, ict, icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
@ -2422,21 +2265,7 @@ L130:
} /* zchk3_ */
/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
@ -2450,21 +2279,21 @@ ftnlen sname_len;
static integer i__, j, m, n;
static doublecomplex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static integer incxs, incys;
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static integer ia, nc, nd, im, in, ms, ix, iy, ns, lx, ly;
extern /* Subroutine */ int czgerc_();
extern /* Subroutine */ void czgerc_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static doublereal errmax;
extern /* Subroutine */ int czgeru_();
extern /* Subroutine */ void czgeru_(integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*);
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda;
static doublecomplex als;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
@ -2793,21 +2622,7 @@ L150:
} /* zchk4_ */
/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2827,13 +2642,14 @@ ftnlen sname_len;
static integer i__, j, n;
static doublecomplex alpha, w[1];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int czher_();
extern /* Subroutine */ void czher_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen);
static logical reset;
static char cuplo[14];
static integer incxs;
extern /* Subroutine */ int czhpr_(), zmvch_();
extern /* Subroutine */ void czhpr_(integer*, char*, integer*, doublereal*, doublecomplex*, integer*, doublecomplex*, ftnlen);
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static logical upper;
static char uplos[1];
static integer ia, ja, ic, nc, jj, lj, in;
@ -2841,10 +2657,10 @@ ftnlen sname_len;
static integer ix, ns, lx;
static doublereal ralpha, errmax;
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHER and ZHPR. */
@ -3167,21 +2983,7 @@ L130:
} /* zchk5_ */
/* Subroutine */ int zchk6_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, ninc, inc, nmax, incmax, a, aa, as, x,
xx, xs, y, yy, ys, yt, g, z__, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *ninc, *inc, *nmax, *incmax;
doublecomplex *a, *aa, *as, *x, *xx, *xs, *y, *yy, *ys, *yt;
doublereal *g;
doublecomplex *z__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk6_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* ninc, integer* inc, integer* nmax, integer* incmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* x, doublecomplex* xx, doublecomplex* xs, doublecomplex* y, doublecomplex* yy, doublecomplex* ys, doublecomplex* yt, doublereal* g, doublecomplex* z__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -3201,25 +3003,26 @@ ftnlen sname_len;
static integer i__, j, n;
static doublecomplex alpha, w[2];
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, integer*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
static logical reset;
static char cuplo[14];
static integer incxs, incys;
extern /* Subroutine */ int zmvch_();
extern /* Subroutine */ int zmvch_(char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen);
static logical upper;
static char uplos[1];
extern /* Subroutine */ int czher2_(), czhpr2_();
extern /* Subroutine */ void czher2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ void czhpr2_(integer*, char*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, ftnlen);
static integer ia, ja, ic, nc, jj, lj, in;
static logical packed;
static integer ix, iy, ns, lx, ly;
static doublereal errmax;
static doublecomplex transl;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda;
static doublecomplex als;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHER2 and ZHPR2. */
@ -3604,24 +3407,7 @@ L170:
} /* zchk6_ */
/* Subroutine */ int zmvch_(trans, m, n, alpha, a, nmax, x, incx, beta, y,
incy, yt, g, yy, eps, err, fatal, nout, mv, trans_len)
char *trans;
integer *m, *n;
doublecomplex *alpha, *a;
integer *nmax;
doublecomplex *x;
integer *incx;
doublecomplex *beta, *y;
integer *incy;
doublecomplex *yt;
doublereal *g;
doublecomplex *yy;
doublereal *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen trans_len;
/* Subroutine */ int zmvch_(char* trans, integer* m, integer* n, doublecomplex* alpha, doublecomplex* a, integer* nmax, doublecomplex* x, integer* incx, doublecomplex* beta, doublecomplex* y, integer* incy, doublecomplex* yt, doublereal* g, doublecomplex* yy, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen trans_len)
{
/* System generated locals */
@ -3819,9 +3605,7 @@ L80:
} /* zmvch_ */
logical lze_(ri, rj, lr)
doublecomplex *ri, *rj;
integer *lr;
logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr)
{
/* System generated locals */
integer i__1, i__2, i__3;
@ -3868,13 +3652,7 @@ L30:
} /* lze_ */
logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublecomplex *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex* aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
@ -3967,9 +3745,7 @@ L80:
} /* lzeres_ */
/* Double Complex */ VOID zbeg_( ret_val, reset)
doublecomplex * ret_val;
logical *reset;
/* Double Complex */ VOID zbeg_( doublecomplex* ret_val, logical* reset)
{
/* System generated locals */
doublereal d__1, d__2;
@ -4030,8 +3806,7 @@ L10:
} /* zbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;
@ -4051,19 +3826,7 @@ doublereal *x, *y;
} /* ddiff_ */
/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, kl,
ku, reset, transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublecomplex *a;
integer *nmax;
doublecomplex *aa;
integer *lda, *kl, *ku;
logical *reset;
doublecomplex *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, integer* kl, integer* ku, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
@ -4072,7 +3835,7 @@ ftnlen diag_len;
/* Local variables */
static integer ibeg, iend, ioff;
extern /* Double Complex */ VOID zbeg_();
extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*);
static logical unit;
static integer i__, j;
static logical lower;

View File

@ -22,14 +22,11 @@ typedef double doublereal;
typedef struct { real r, i; } complex;
typedef struct { doublereal r, i; } doublecomplex;
#ifdef _MSC_VER
static inline _Fcomplex Cf(complex *z) {_Fcomplex zz={z->r , z->i}; return zz;}
static inline _Dcomplex Cd(doublecomplex *z) {_Dcomplex zz={z->r , z->i};return zz;}
static inline _Fcomplex * _pCf(complex *z) {return (_Fcomplex*)z;}
static inline _Dcomplex * _pCd(doublecomplex *z) {return (_Dcomplex*)z;}
#else
static inline _Complex float Cf(complex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex double Cd(doublecomplex *z) {return z->r + z->i*_Complex_I;}
static inline _Complex float * _pCf(complex *z) {return (_Complex float*)z;}
static inline _Complex double * _pCd(doublecomplex *z) {return (_Complex double*)z;}
#endif
#define pCf(z) (*_pCf(z))
@ -242,124 +239,7 @@ typedef struct Namelist Namelist;
/* procedure parameter types for -A and -C++ */
#define F2C_proc_par_types 1
#ifdef __cplusplus
typedef logical (*L_fp)(...);
#else
typedef logical (*L_fp)();
#endif
#if 0
static float spow_ui(float x, integer n) {
float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static double dpow_ui(double x, integer n) {
double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#ifdef _MSC_VER
static _Fcomplex cpow_ui(complex x, integer n) {
complex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x.r = 1/x.r, x.i=1/x.i;
for(u = n; ; ) {
if(u & 01) pow.r *= x.r, pow.i *= x.i;
if(u >>= 1) x.r *= x.r, x.i *= x.i;
else break;
}
}
_Fcomplex p={pow.r, pow.i};
return p;
}
#else
static _Complex float cpow_ui(_Complex float x, integer n) {
_Complex float pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
#ifdef _MSC_VER
static _Dcomplex zpow_ui(_Dcomplex x, integer n) {
_Dcomplex pow={1.0,0.0}; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x._Val[0] = 1/x._Val[0], x._Val[1] =1/x._Val[1];
for(u = n; ; ) {
if(u & 01) pow._Val[0] *= x._Val[0], pow._Val[1] *= x._Val[1];
if(u >>= 1) x._Val[0] *= x._Val[0], x._Val[1] *= x._Val[1];
else break;
}
}
_Dcomplex p = {pow._Val[0], pow._Val[1]};
return p;
}
#else
static _Complex double zpow_ui(_Complex double x, integer n) {
_Complex double pow=1.0; unsigned long int u;
if(n != 0) {
if(n < 0) n = -n, x = 1/x;
for(u = n; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
#endif
static integer pow_ii(integer x, integer n) {
integer pow; unsigned long int u;
if (n <= 0) {
if (n == 0 || x == 1) pow = 1;
else if (x != -1) pow = x == 0 ? 1/x : 0;
else n = -n;
}
if ((n > 0) || !(n == 0 || x == 1 || x != -1)) {
u = n;
for(pow = 1; ; ) {
if(u & 01) pow *= x;
if(u >>= 1) x *= x;
else break;
}
}
return pow;
}
static integer dmaxloc_(double *w, integer s, integer e, integer *n)
{
double m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
static integer smaxloc_(float *w, integer s, integer e, integer *n)
{
float m; integer i, mi;
for(m=w[s-1], mi=s, i=s+1; i<=e; i++)
if (w[i-1]>m) mi=i ,m=w[i-1];
return mi-s+1;
}
#endif
/* Common Block Declarations */
@ -388,7 +268,7 @@ static logical c_true = TRUE_;
static integer c__0 = 0;
static logical c_false = FALSE_;
/* Main program MAIN__() */ int main()
/* Main program MAIN__() */ int main(void)
{
/* Initialized data */
@ -400,26 +280,29 @@ static logical c_false = FALSE_;
doublereal d__1;
/* Builtin functions */
integer s_rsle(), do_lio(), e_rsle(), f_open(), s_wsfe(), do_fio(),
e_wsfe(), s_wsle(), e_wsle(), s_rsfe(), e_rsfe();
integer s_rsle(void), do_lio(void), e_rsle(void), f_open(void), s_wsfe(void), do_fio(void),
e_wsfe(void), s_wsle(void), e_wsle(void), s_rsfe(void), e_rsfe(void);
/* Local variables */
static integer nalf, idim[9];
static logical same;
static integer nbet, ntra;
static logical rewi;
extern /* Subroutine */ int zchk1_(), zchk2_(), zchk3_(), zchk4_(),
zchk5_();
extern /* Subroutine */ int zchk1_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk2_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk3_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
extern /* Subroutine */ int zchk4_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, integer*, ftnlen);
extern /* Subroutine */ int zchk5_(char*, doublereal*, doublereal*, integer*, integer*, logical*, logical*, logical*, integer*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublecomplex*, doublereal*, doublecomplex*, integer*, ftnlen);
static doublecomplex c__[4225] /* was [65][65] */;
static doublereal g[65];
static integer i__, j;
extern doublereal ddiff_();
extern doublereal ddiff_(doublereal*, doublereal*);
static integer n;
static logical fatal;
static doublecomplex w[130];
static logical trace;
static integer nidim;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static char snaps[32];
static integer isnum;
static logical ltest[9];
@ -431,10 +314,10 @@ static logical c_false = FALSE_;
static logical rorder;
static integer layout;
static logical ltestt, tsterr;
extern /* Subroutine */ int cz3chke_();
extern /* Subroutine */ int cz3chke_(char*, ftnlen);
static doublecomplex alf[7], bet[7];
static doublereal eps, err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
char tmpchar;
/* Test program for the COMPLEX*16 Level 3 Blas. */
@ -924,22 +807,7 @@ L230:
} /* MAIN__ */
/* Subroutine */ int zchk1_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk1_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -956,21 +824,21 @@ ftnlen sname_len;
static integer i__, k, m, n;
static doublecomplex alpha;
static logical isame[13], trana, tranb;
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical reset;
static integer ia, ib;
extern /* Subroutine */ int zprcn1_();
extern /* Subroutine */ int zprcn1_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer ma, mb, na, nb, nc, ik, im, in, ks, ms, ns;
extern /* Subroutine */ int czgemm_();
extern /* Subroutine */ void czgemm_(integer*, char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char tranas[1], tranbs[1], transa[1], transb[1];
static doublereal errmax;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer ica, icb, laa, lbb, lda, lcc, ldb, ldc;
static doublecomplex als, bls;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZGEMM. */
@ -1313,20 +1181,7 @@ L130:
} /* zchk1_ */
/* Subroutine */ int zprcn1_(nout, nc, sname, iorder, transa, transb, m, n, k,
alpha, lda, ldb, beta, ldc, sname_len, transa_len, transb_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *transa, *transb;
integer *m, *n, *k;
doublecomplex *alpha;
integer *lda, *ldb;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int zprcn1_(integer* nout, integer* nc, char* sname, integer* iorder, char* transa, char* transb, integer* m, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen transa_len, ftnlen transb_len)
{
/* Local variables */
static char crc[14], cta[14], ctb[14];
@ -1357,22 +1212,7 @@ return 0;
} /* zprcn1_ */
/* Subroutine */ int zchk2_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk2_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1394,23 +1234,23 @@ ftnlen sname_len;
static doublecomplex alpha;
static logical isame[13];
static char sides[1];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical reset;
static char uplos[1];
static integer ia, ib;
extern /* Subroutine */ int zprcn2_();
extern /* Subroutine */ int zprcn2_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer na, nc, im, in, ms, ns;
extern /* Subroutine */ int czhemm_();
extern /* Subroutine */ void czhemm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static doublereal errmax;
extern logical lzeres_();
extern /* Subroutine */ int czsymm_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void czsymm_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lbb, lda, lcc, ldb, ldc, ics;
static doublecomplex als, bls;
static integer icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHEMM and ZSYMM. */
@ -1737,20 +1577,7 @@ L120:
} /* zchk2_ */
/* Subroutine */ int zprcn2_(nout, nc, sname, iorder, side, uplo, m, n, alpha,
lda, ldb, beta, ldc, sname_len, side_len, uplo_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo;
integer *m, *n;
doublecomplex *alpha;
integer *lda, *ldb;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
/* Subroutine */ int zprcn2_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len)
{
/* Local variables */
static char cs[14], cu[14], crc[14];
@ -1777,21 +1604,7 @@ return 0;
} /* zprcn2_ */
/* Subroutine */ int zchk3_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nmax, a, aa, as, b, bb, bs, ct, g, c__,
iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *ct;
doublereal *g;
doublecomplex *c__;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk3_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* ct, doublereal* g, doublecomplex* c__, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -1817,23 +1630,24 @@ ftnlen sname_len;
static char diags[1];
static logical isame[13];
static char sides[1];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static logical reset;
static char uplos[1];
static integer ia, na;
extern /* Subroutine */ int zprcn3_();
extern /* Subroutine */ int zprcn3_(integer*, integer*, char*, integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, ftnlen, ftnlen, ftnlen, ftnlen, ftnlen);
static integer nc, im, in, ms, ns;
static char tranas[1], transa[1];
static doublereal errmax;
extern logical lzeres_();
extern /* Subroutine */ int cztrmm_(), cztrsm_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
extern /* Subroutine */ void cztrmm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ void cztrsm_(integer*, char*, char*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen, ftnlen);
static integer laa, icd, lbb, lda, ldb, ics;
static doublecomplex als;
static integer ict, icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZTRMM and ZTRSM. */
@ -2227,21 +2041,7 @@ L160:
} /* zchk3_ */
/* Subroutine */ int zprcn3_(nout, nc, sname, iorder, side, uplo, transa,
diag, m, n, alpha, lda, ldb, sname_len, side_len, uplo_len,
transa_len, diag_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *side, *uplo, *transa, *diag;
integer *m, *n;
doublecomplex *alpha;
integer *lda, *ldb;
ftnlen sname_len;
ftnlen side_len;
ftnlen uplo_len;
ftnlen transa_len;
ftnlen diag_len;
/* Subroutine */ int zprcn3_(integer* nout, integer* nc, char* sname, integer* iorder, char* side, char* uplo, char* transa, char* diag, integer* m, integer* n, doublecomplex* alpha, integer* lda, integer* ldb, ftnlen sname_len, ftnlen side_len, ftnlen uplo_len, ftnlen transa_len, ftnlen diag_len)
{
/* Local variables */
@ -2281,22 +2081,7 @@ return 0;
} /* zprcn3_ */
/* Subroutine */ int zchk4_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, a, aa, as, b, bb, bs,
c__, cc, cs, ct, g, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *a, *aa, *as, *b, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk4_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* a, doublecomplex* aa, doublecomplex* as, doublecomplex* b, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2320,30 +2105,30 @@ ftnlen sname_len;
static doublecomplex alpha;
static doublereal rbeta;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static doublereal rbets;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
static integer ia, ib, jc, ma, na;
extern /* Subroutine */ int zprcn4_();
extern /* Subroutine */ int zprcn4_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
static integer nc;
extern /* Subroutine */ int zprcn6_();
extern /* Subroutine */ int zprcn6_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublereal*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ik, in, jj, lj, ks, ns;
static doublereal ralpha;
extern /* Subroutine */ int czherk_();
extern /* Subroutine */ int czherk_(integer*, char*, char*, integer*, integer*, doublereal*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen);
static doublereal errmax;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char transs[1], transt[1];
extern /* Subroutine */ int czsyrk_();
extern /* Subroutine */ int czsyrk_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lda, lcc, ldc;
static doublecomplex als;
static integer ict, icu;
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHERK and ZSYRK. */
@ -2732,20 +2517,7 @@ L130:
} /* zchk4_ */
/* Subroutine */ int zprcn4_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublecomplex *alpha;
integer *lda;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn4_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -2775,20 +2547,7 @@ return 0;
/* Subroutine */ int zprcn6_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublereal *alpha;
integer *lda;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn6_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublereal* alpha, integer* lda, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
@ -2818,23 +2577,7 @@ return 0;
} /* zprcn6_ */
/* Subroutine */ int zchk5_(sname, eps, thresh, nout, ntra, trace, rewi,
fatal, nidim, idim, nalf, alf, nbet, bet, nmax, ab, aa, as, bb, bs,
c__, cc, cs, ct, g, w, iorder, sname_len)
char *sname;
doublereal *eps, *thresh;
integer *nout, *ntra;
logical *trace, *rewi, *fatal;
integer *nidim, *idim, *nalf;
doublecomplex *alf;
integer *nbet;
doublecomplex *bet;
integer *nmax;
doublecomplex *ab, *aa, *as, *bb, *bs, *c__, *cc, *cs, *ct;
doublereal *g;
doublecomplex *w;
integer *iorder;
ftnlen sname_len;
/* Subroutine */ int zchk5_(char* sname, doublereal* eps, doublereal* thresh, integer* nout, integer* ntra, logical* trace, logical* rewi, logical* fatal, integer* nidim, integer* idim, integer* nalf, doublecomplex* alf, integer* nbet, doublecomplex* bet, integer* nmax, doublecomplex* ab, doublecomplex* aa, doublecomplex* as, doublecomplex* bb, doublecomplex* bs, doublecomplex* c__, doublecomplex* cc, doublecomplex* cs, doublecomplex* ct, doublereal* g, doublecomplex* w, integer* iorder, ftnlen sname_len)
{
/* Initialized data */
@ -2857,27 +2600,28 @@ ftnlen sname_len;
static doublecomplex alpha;
static doublereal rbeta;
static logical isame[13];
extern /* Subroutine */ int zmake_();
extern /* Subroutine */ int zmake_(char*, char*, char*, integer*, integer*, doublecomplex*, integer*, doublecomplex*, integer*, logical*, doublecomplex*, ftnlen, ftnlen, ftnlen);
static integer nargs;
extern /* Subroutine */ int zmmch_();
extern /* Subroutine */ int zmmch_(char*, char*, integer*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, doublereal*, doublecomplex*, integer*, doublereal*, doublereal*, logical*, integer*, logical*, ftnlen, ftnlen);
static doublereal rbets;
static logical reset;
static char trans[1];
static logical upper;
static char uplos[1];
static integer ia, ib, jc, ma, na, nc;
extern /* Subroutine */ int zprcn5_(), zprcn7_();
extern /* Subroutine */ int zprcn5_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublecomplex*, integer*, ftnlen, ftnlen, ftnlen);
extern /* Subroutine */ int zprcn7_(integer*, integer*, char*, integer*, char*, char*, integer*, integer*, doublecomplex*, integer*, integer*, doublereal*, integer*, ftnlen, ftnlen, ftnlen);
static integer ik, in, jj, lj, ks, ns;
static doublereal errmax;
extern logical lzeres_();
extern logical lzeres_(char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static char transs[1], transt[1];
extern /* Subroutine */ int czher2k_();
extern /* Subroutine */ int czher2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublereal*, doublecomplex*, integer*, ftnlen, ftnlen);
static integer laa, lbb, lda, lcc, ldb, ldc;
static doublecomplex als;
static integer ict, icu;
extern /* Subroutine */ int czsyr2k_();
extern /* Subroutine */ int czsyr2k_(integer*, char*, char*, integer*, integer*, doublecomplex*, doublecomplex*, integer*, doublecomplex*, integer*, doublecomplex*, doublecomplex*, integer*, ftnlen, ftnlen);
static doublereal err;
extern logical lze_();
extern logical lze_(doublecomplex*, doublecomplex*, integer*);
/* Tests ZHER2K and ZSYR2K. */
@ -3349,20 +3093,7 @@ L160:
} /* zchk5_ */
/* Subroutine */ int zprcn5_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublecomplex *alpha;
integer *lda, *ldb;
doublecomplex *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn5_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublecomplex* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
static char ca[14], cu[14], crc[14];
@ -3392,20 +3123,7 @@ return 0;
/* Subroutine */ int zprcn7_(nout, nc, sname, iorder, uplo, transa, n, k,
alpha, lda, ldb, beta, ldc, sname_len, uplo_len, transa_len)
integer *nout, *nc;
char *sname;
integer *iorder;
char *uplo, *transa;
integer *n, *k;
doublecomplex *alpha;
integer *lda, *ldb;
doublereal *beta;
integer *ldc;
ftnlen sname_len;
ftnlen uplo_len;
ftnlen transa_len;
/* Subroutine */ int zprcn7_(integer* nout, integer* nc, char* sname, integer* iorder, char* uplo, char* transa, integer* n, integer* k, doublecomplex* alpha, integer* lda, integer* ldb, doublereal* beta, integer* ldc, ftnlen sname_len, ftnlen uplo_len, ftnlen transa_len)
{
/* Local variables */
@ -3435,19 +3153,7 @@ return 0;
} /* zprcn7_ */
/* Subroutine */ int zmake_(type__, uplo, diag, m, n, a, nmax, aa, lda, reset,
transl, type_len, uplo_len, diag_len)
char *type__, *uplo, *diag;
integer *m, *n;
doublecomplex *a;
integer *nmax;
doublecomplex *aa;
integer *lda;
logical *reset;
doublecomplex *transl;
ftnlen type_len;
ftnlen uplo_len;
ftnlen diag_len;
/* Subroutine */ int zmake_(char* type__, char* uplo, char* diag, integer* m, integer* n, doublecomplex* a, integer* nmax, doublecomplex* aa, integer* lda, logical* reset, doublecomplex* transl, ftnlen type_len, ftnlen uplo_len, ftnlen diag_len)
{
/* System generated locals */
integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
@ -3456,7 +3162,7 @@ ftnlen diag_len;
/* Local variables */
static integer ibeg, iend;
extern /* Double Complex */ VOID zbeg_();
extern /* Double Complex */ VOID zbeg_(doublecomplex*, logical*);
static logical unit;
static integer i__, j;
static logical lower, upper;
@ -3629,27 +3335,7 @@ ftnlen diag_len;
} /* zmake_ */
/* Subroutine */ int zmmch_(transa, transb, m, n, kk, alpha, a, lda, b, ldb,
beta, c__, ldc, ct, g, cc, ldcc, eps, err, fatal, nout, mv,
transa_len, transb_len)
char *transa, *transb;
integer *m, *n, *kk;
doublecomplex *alpha, *a;
integer *lda;
doublecomplex *b;
integer *ldb;
doublecomplex *beta, *c__;
integer *ldc;
doublecomplex *ct;
doublereal *g;
doublecomplex *cc;
integer *ldcc;
doublereal *eps, *err;
logical *fatal;
integer *nout;
logical *mv;
ftnlen transa_len;
ftnlen transb_len;
/* Subroutine */ int zmmch_(char* transa, char* transb, integer* m, integer* n, integer* kk, doublecomplex* alpha, doublecomplex* a, integer* lda, doublecomplex* b, integer* ldb, doublecomplex* beta, doublecomplex* c__, integer* ldc, doublecomplex* ct, doublereal* g, doublecomplex* cc, integer* ldcc, doublereal* eps, doublereal* err, logical* fatal, integer* nout, logical* mv, ftnlen transa_len, ftnlen transb_len)
{
/* System generated locals */
@ -3658,7 +3344,7 @@ ftnlen transb_len;
doublereal d__1, d__2, d__3, d__4, d__5, d__6;
doublecomplex z__1, z__2, z__3, z__4;
double sqrt();
double sqrt(double);
/* Local variables */
static doublereal erri;
static integer i__, j, k;
@ -4031,9 +3717,7 @@ L250:
} /* zmmch_ */
logical lze_(ri, rj, lr)
doublecomplex *ri, *rj;
integer *lr;
logical lze_(doublecomplex* ri, doublecomplex* rj, integer* lr)
{
/* System generated locals */
integer i__1, i__2, i__3;
@ -4082,13 +3766,7 @@ L30:
} /* lze_ */
logical lzeres_(type__, uplo, m, n, aa, as, lda, type_len, uplo_len)
char *type__, *uplo;
integer *m, *n;
doublecomplex *aa, *as;
integer *lda;
ftnlen type_len;
ftnlen uplo_len;
logical lzeres_(char* type__, char* uplo, integer* m, integer* n, doublecomplex *aa, doublecomplex* as, integer* lda, ftnlen type_len, ftnlen uplo_len)
{
/* System generated locals */
integer aa_dim1, aa_offset, as_dim1, as_offset, i__1, i__2, i__3, i__4;
@ -4184,9 +3862,7 @@ L80:
} /* lzeres_ */
/* Double Complex */ VOID zbeg_( ret_val, reset)
doublecomplex * ret_val;
logical *reset;
/* Double Complex */ VOID zbeg_(doublecomplex* ret_val, logical* reset)
{
/* System generated locals */
doublereal d__1, d__2;
@ -4249,8 +3925,7 @@ L10:
} /* zbeg_ */
doublereal ddiff_(x, y)
doublereal *x, *y;
doublereal ddiff_(doublereal* x, doublereal* y)
{
/* System generated locals */
doublereal ret_val;

3951
ctest/c_zblat3c_3m.c Normal file

File diff suppressed because it is too large Load Diff

270
docs/distributing.md Normal file
View File

@ -0,0 +1,270 @@
# Guidance for redistributing OpenBLAS
*We note that this document contains recommendations only - packagers and other
redistributors are in charge of how OpenBLAS is built and distributed in their
systems, and may have good reasons to deviate from the guidance given on this
page. These recommendations are aimed at general packaging systems, with a user
base that typically is large, open source (or freely available at least), and
doesn't behave uniformly or that the packager is directly connected with.*
OpenBLAS has a large number of build-time options which can be used to change
how it behaves at runtime, how artifacts or symbols are named, etc. Variation
in build configuration can be necessary to acheive a given end goal within a
distribution or as an end user. However, such variation can also make it more
difficult to build on top of OpenBLAS and ship code or other packages in a way
that works across many different distros. Here we provide guidance about the
most important build options, what effects they may have when changed, and
which ones to default to.
The Make and CMake build systems provide equivalent options and yield more or
less the same artifacts, but not exactly (the CMake builds are still
experimental). You can choose either one and the options will function in the
same way, however the CMake outputs may require some renaming. To review
available build options, see `Makefile.rule` or `CMakeLists.txt` in the root of
the repository.
Build options typically fall into two categories: (a) options that affect the
user interface, such as library and symbol names or APIs that are made
available, and (b) options that affect performance and runtime behavior, such
as threading behavior or CPU architecture-specific code paths. The user
interface options are more important to keep aligned between distributions,
while for the performance-related options there are typically more reasons to
make choices that deviate from the defaults.
Here are recommendations for user interface related packaging choices where it
is not likely to be a good idea to deviate (typically these are the default
settings):
1. Include CBLAS. The CBLAS interface is widely used and it doesn't affect
binary size much, so don't turn it off.
2. Include LAPACK and LAPACKE. The LAPACK interface is also widely used, and
while it does make up a significant part of the binary size of the installed
library, that does not outweigh the regression in usability when deviating
from the default here.[^1]
3. Always distribute the pkg-config (`.pc`) and CMake `.cmake`) dependency
detection files. These files are used by build systems when users want to
link against OpenBLAS, and there is no benefit of leaving them out.
4. Provide the LP64 interface by default, and if in addition to that you choose
to provide an ILP64 interface build as well, use a symbol suffix to avoid
symbol name clashes (see the next section).
[^1] All major distributions do include LAPACK as of mid 2023 as far as we
know. Older versions of Arch Linux did not, and that was known to cause
problems.
## ILP64 interface builds
The LP64 (32-bit integer) interface is the default build, and has
well-established C and Fortran APIs as determined by the reference (Netlib)
BLAS and LAPACK libraries. The ILP64 (64-bit integer) interface however does
not have a standard API: symbol names and shared/static library names can be
produced in multiple ways, and this tends to make it difficult to use.
As of today there is an agreed-upon way of choosing names for OpenBLAS between
a number of key users/redistributors, which is the closest thing to a standard
that there is now. However, there is an ongoing standardization effort in the
reference BLAS and LAPACK libraries, which differs from the current OpenBLAS
agreed-upon convention. In this section we'll aim to explain both.
Those two methods are fairly similar, and have a key thing in common: *using a
symbol suffix*. This is good practice; it is recommended that if you distribute
an ILP64 build, to have it use a symbol suffix containing `64` in the name.
This avoids potential symbol clashes when different packages which depend on
OpenBLAS load both an LP64 and an ILP64 library into memory at the same time.
### The current OpenBLAS agreed-upon ILP64 convention
This convention comprises the shared library name and the symbol suffix in the
shared library. The symbol suffix to use is `64_`, implying that the library
name will be `libopenblas64_.so` and the symbols in that library end in `64_`.
The central issue where this was discussed is
[openblas#646](https://github.com/xianyi/OpenBLAS/issues/646), and adopters
include Fedora, Julia, NumPy and SciPy - SuiteSparse already used it as well.
To build shared and static libraries with the currently recommended ILP64
conventions with Make:
```bash
$ make INTERFACE64=1 SYMBOLSUFFIX=64_
```
This will produce libraries named `libopenblas64_.so|a`, a pkg-config file
named `openblas64.pc`, and CMake and header files.
Installing locally and inspecting the output will show a few more details:
```bash
$ make install PREFIX=$PWD/../openblas/make64 INTERFACE64=1 SYMBOLSUFFIX=64_
$ tree . # output slightly edited down
.
├── include
│   ├── cblas.h
│   ├── f77blas.h
│   ├── lapacke_config.h
│   ├── lapacke.h
│   ├── lapacke_mangling.h
│   ├── lapacke_utils.h
│   ├── lapack.h
│   └── openblas_config.h
└── lib
├── cmake
│   └── openblas
│   ├── OpenBLASConfig.cmake
│   └── OpenBLASConfigVersion.cmake
├── libopenblas64_.a
├── libopenblas64_.so
└── pkgconfig
└── openblas64.pc
```
A key point are the symbol names. These will equal the LP64 symbol names, then
(for Fortran only) the compiler mangling, and then the `64_` symbol suffix.
Hence to obtain the final symbol names, we need to take into account which
Fortran compiler we are using. For the most common cases (e.g., gfortran, Intel
Fortran, or Flang), that means appending a single underscore. In that case, the
result is:
| base API name | binary symbol name | call from Fortran code | call from C code |
|---------------|--------------------|------------------------|-----------------------|
| `dgemm` | `dgemm_64_` | `dgemm_64(...)` | `dgemm_64_(...)` |
| `cblas_dgemm` | `cblas_dgemm64_` | n/a | `cblas_dgemm64_(...)` |
It is quite useful to have these symbol names be as uniform as possible across
different packaging systems.
The equivalent build options with CMake are:
```bash
$ mkdir build && cd build
$ cmake .. -DINTERFACE64=1 -DSYMBOLSUFFIX=64_ -DBUILD_SHARED_LIBS=ON -DBUILD_STATIC_LIBS=ON
$ cmake --build . -j
```
Note that the result is not 100% identical to the Make result. For example, the
library name ends in `_64` rather than `64_` - it is recommended to rename them
to match the Make library names (also update the `libsuffix` entry in
`openblas64.pc` to match that rename).
```bash
$ cmake --install . --prefix $PWD/../../openblas/cmake64
$ tree .
.
├── include
│   └── openblas64
│   ├── cblas.h
│   ├── f77blas.h
│   ├── lapacke_config.h
│   ├── lapacke_example_aux.h
│   ├── lapacke.h
│   ├── lapacke_mangling.h
│   ├── lapacke_utils.h
│   ├── lapack.h
│   ├── openblas64
│   │   └── lapacke_mangling.h
│   └── openblas_config.h
└── lib
├── cmake
│   └── OpenBLAS64
│   ├── OpenBLAS64Config.cmake
│   ├── OpenBLAS64ConfigVersion.cmake
│   ├── OpenBLAS64Targets.cmake
│   └── OpenBLAS64Targets-noconfig.cmake
├── libopenblas_64.a
├── libopenblas_64.so -> libopenblas_64.so.0
└── pkgconfig
└── openblas64.pc
```
### The upcoming standardized ILP64 convention
While the `64_` convention above got some adoption, it's slightly hacky and is
implemented through the use of `objcopy`. An effort is ongoing for a more
broadly adopted convention in the reference BLAS and LAPACK libraries, using
(a) the `_64` suffix, and (b) applying that suffix _before_ rather than after
Fortran compiler mangling. The central issue for this is
[lapack#666](https://github.com/Reference-LAPACK/lapack/issues/666).
For the most common cases of compiler mangling (a single `_` appended), the end
result will be:
| base API name | binary symbol name | call from Fortran code | call from C code |
|---------------|--------------------|------------------------|-----------------------|
| `dgemm` | `dgemm_64_` | `dgemm_64(...)` | `dgemm_64_(...)` |
| `cblas_dgemm` | `cblas_dgemm_64` | n/a | `cblas_dgemm_64(...)` |
For other compiler mangling schemes, replace the trailing `_` by the scheme in use.
The shared library name for this `_64` convention should be `libopenblas_64.so`.
Note: it is not yet possible to produce an OpenBLAS build which employs this
convention! Once reference BLAS and LAPACK with support for `_64` have been
released, a future OpenBLAS release will support it. For now, please use the
older `64_` scheme and avoid using the name `libopenblas_64.so`; it should be
considered reserved for future use of the `_64` standard as prescribed by
reference BLAS/LAPACK.
## Performance and runtime behavior related build options
For these options there are multiple reasonable or common choices.
### Threading related options
OpenBLAS can be built as a multi-threaded or single-threaded library, with the
default being multi-threaded. It's expected that the default `libopenblas`
library is multi-threaded; if you'd like to also distribute single-threaded
builds, consider naming them `libopenblas_sequential`.
OpenBLAS can be built with pthreads or OpenMP as the threading model, with the
default being pthreads. Both options are commonly used, and the choice here
should not influence the shared library name. The choice will be captured by
the `.pc` file. E.g.,:
```bash
$ pkg-config --libs openblas
-fopenmp -lopenblas
$ cat openblas.pc
...
openblas_config= ... USE_OPENMP=0 MAX_THREADS=24
```
The maximum number of threads users will be able to use is determined at build
time by the `NUM_THREADS` build option. It defaults to 24, and there's a wide
range of values that are reasonable to use (up to 256). 64 is a typical choice
here; there is a memory footprint penalty that is linear in `NUM_THREADS`.
Please see `Makefile.rule` for more details.
### CPU architecture related options
OpenBLAS contains a lot of CPU architecture-specific optimizations, hence when
distributing to a user base with a variety of hardware, it is recommended to
enable CPU architecture runtime detection. This will dynamically select
optimized kernels for individual APIs. To do this, use the `DYNAMIC_ARCH=1`
build option. This is usually done on all common CPU families, except when
there are known issues.
In case the CPU architecture is known (e.g. you're building binaries for macOS
M1 users), it is possible to specify the target architecture directly with the
`TARGET=` build option.
`DYNAMIC_ARCH` and `TARGET` are covered in more detail in the main `README.md`
in this repository.
## Real-world examples
OpenBLAS is likely to be distributed in one of these distribution models:
1. As a standalone package, or multiple packages, in a packaging ecosystem like
a Linux distro, Homebrew, conda-forge or MSYS2.
2. Vendored as part of a larger package, e.g. in Julia, NumPy, SciPy, or R.
3. Locally, e.g. making available as a build on a single HPC cluster.
The guidance on this page is most important for models (1) and (2). These links
to build recipes for a representative selection of packaging systems may be
helpful as a reference:
- [Fedora](https://src.fedoraproject.org/rpms/openblas/blob/rawhide/f/openblas.spec)
- [Debian](https://salsa.debian.org/science-team/openblas/-/blob/master/debian/rules)
- [Homebrew](https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/openblas.rb)
- [MSYS2](https://github.com/msys2/MINGW-packages/blob/master/mingw-w64-openblas/PKGBUILD)
- [conda-forge](https://github.com/conda-forge/openblas-feedstock/blob/main/recipe/build.sh)
- [NumPy/SciPy](https://github.com/MacPython/openblas-libs/blob/main/tools/build_openblas.sh)
- [Nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/development/libraries/science/math/openblas/default.nix)

View File

@ -40,7 +40,7 @@
#include <stdlib.h>
#include "common.h"
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];

View File

@ -60,7 +60,7 @@ static const int divide_rule[][2] =
{ 1, 61}, { 2, 31}, { 7, 9}, { 8, 8},
};
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];

View File

@ -40,7 +40,7 @@
#include <stdlib.h>
#include "common.h"
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];

View File

@ -42,7 +42,7 @@
int CNAME(int mode,
blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n,
int (*function)(), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {
int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*,FLOAT *, FLOAT *, BLASLONG ), void *sa, void *sb, BLASLONG divM, BLASLONG divN) {
blas_queue_t queue[MAX_CPU_NUMBER];

View File

@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@ -44,10 +45,6 @@
#define DIVIDE_RATE 2
#endif
#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif
//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
@ -1015,6 +1012,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
BLASLONG divN, divT;
int mode;
#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif
if (range_m) {
BLASLONG m_from = *(((BLASLONG *)range_m) + 0);
BLASLONG m_to = *(((BLASLONG *)range_m) + 1);
@ -1030,7 +1033,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
}
*/
if ((args -> m < nthreads * SWITCH_RATIO) || (args -> n < nthreads * SWITCH_RATIO)) {
if ((args -> m < nthreads * switch_ratio) || (args -> n < nthreads * switch_ratio)) {
GEMM3M_LOCAL(args, range_m, range_n, sa, sb, 0);
return 0;
}
@ -1038,7 +1041,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
divT = nthreads;
divN = 1;
while ((GEMM3M_P * divT > m * SWITCH_RATIO) && (divT > 1)) {
while ((GEMM3M_P * divT > m * switch_ratio) && (divT > 1)) {
do {
divT --;
divN = 1;

View File

@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@ -44,10 +45,6 @@
#define DIVIDE_RATE 2
#endif
#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif
//The array of job_t may overflow the stack.
//Instead, use malloc to alloc job_t.
#if MAX_CPU_NUMBER > BLAS3_MEM_ALLOC_THRESHOLD
@ -528,7 +525,13 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
int mode, mask;
double dnum, di, dinum;
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) {
#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif
if ((nthreads == 1) || (args->n < nthreads * switch_ratio)) {
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0);
return 0;
}

View File

@ -1,5 +1,6 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* Copyright 2023 The OpenBLAS Project. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
@ -44,10 +45,6 @@
#define DIVIDE_RATE 2
#endif
#ifndef SWITCH_RATIO
#define SWITCH_RATIO 2
#endif
#ifndef GEMM_PREFERED_SIZE
#define GEMM_PREFERED_SIZE 1
#endif
@ -595,6 +592,11 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
BLASLONG width, i, j, k, js;
BLASLONG m, n, n_from, n_to;
int mode;
#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif
/* Get execution mode */
#ifndef COMPLEX
@ -732,8 +734,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
num_parts = 0;
while (n > 0){
width = blas_quickdivide(n + nthreads - num_parts - 1, nthreads - num_parts);
if (width < SWITCH_RATIO) {
width = SWITCH_RATIO;
if (width < switch_ratio) {
width = switch_ratio;
}
width = round_up(n, width, GEMM_PREFERED_SIZE);
@ -792,6 +794,11 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
BLASLONG m = args -> m;
BLASLONG n = args -> n;
BLASLONG nthreads_m, nthreads_n;
#if defined(DYNAMIC_ARCH)
int switch_ratio = gotoblas->switch_ratio;
#else
int switch_ratio = SWITCH_RATIO;
#endif
/* Get dimensions from index ranges if available */
if (range_m) {
@ -801,21 +808,21 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, IFLOAT *sa, IF
n = range_n[1] - range_n[0];
}
/* Partitions in m should have at least SWITCH_RATIO rows */
if (m < 2 * SWITCH_RATIO) {
/* Partitions in m should have at least switch_ratio rows */
if (m < 2 * switch_ratio) {
nthreads_m = 1;
} else {
nthreads_m = args -> nthreads;
while (m < nthreads_m * SWITCH_RATIO) {
while (m < nthreads_m * switch_ratio) {
nthreads_m = nthreads_m / 2;
}
}
/* Partitions in n should have at most SWITCH_RATIO * nthreads_m columns */
if (n < SWITCH_RATIO * nthreads_m) {
/* Partitions in n should have at most switch_ratio * nthreads_m columns */
if (n < switch_ratio * nthreads_m) {
nthreads_n = 1;
} else {
nthreads_n = (n + SWITCH_RATIO * nthreads_m - 1) / (SWITCH_RATIO * nthreads_m);
nthreads_n = (n + switch_ratio * nthreads_m - 1) / (switch_ratio * nthreads_m);
if (nthreads_m * nthreads_n > args -> nthreads) {
nthreads_n = blas_quickdivide(args -> nthreads, nthreads_m);
}

View File

@ -41,7 +41,7 @@
#include <math.h>
#include "common.h"
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(), void *sa, void *sb, BLASLONG nthreads) {
int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int (*function)(blas_arg_t*, BLASLONG*, BLASLONG*, FLOAT *, FLOAT *, BLASLONG), void *sa, void *sb, BLASLONG nthreads) {
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];

View File

@ -43,7 +43,7 @@
int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
void *c, BLASLONG ldc, int (*function)(void), int nthreads){
blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];
@ -141,7 +141,7 @@ int blas_level1_thread(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha
int blas_level1_thread_with_return_value(int mode, BLASLONG m, BLASLONG n, BLASLONG k, void *alpha,
void *a, BLASLONG lda,
void *b, BLASLONG ldb,
void *c, BLASLONG ldc, int (*function)(), int nthreads){
void *c, BLASLONG ldc, int (*function)(void), int nthreads){
blas_queue_t queue[MAX_CPU_NUMBER];
blas_arg_t args [MAX_CPU_NUMBER];

View File

@ -93,7 +93,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif
#endif
extern unsigned int openblas_thread_timeout();
extern unsigned int openblas_thread_timeout(void);
#ifdef SMP_SERVER
@ -113,6 +113,8 @@ extern unsigned int openblas_thread_timeout();
/* We need this global for checking if initialization is finished. */
int blas_server_avail __attribute__((aligned(ATTRIBUTE_SIZE))) = 0;
int blas_omp_threads_local = 1;
/* Local Variables */
#if defined(USE_PTHREAD_LOCK)
static pthread_mutex_t server_lock = PTHREAD_MUTEX_INITIALIZER;
@ -973,7 +975,7 @@ void goto_set_num_threads(int num_threads) {
increased_threads = 1;
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
for(i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++){
atomic_store_queue(&thread_status[i].queue, (blas_queue_t *)0);
thread_status[i].status = THREAD_STATUS_WAKEUP;

View File

@ -68,8 +68,10 @@
#endif
int blas_server_avail = 0;
int blas_omp_number_max = 0;
int blas_omp_threads_local = 1;
extern int openblas_omp_adaptive_env();
extern int openblas_omp_adaptive_env(void);
static void * blas_thread_buffer[MAX_PARALLEL_NUMBER][MAX_CPU_NUMBER];
#ifdef HAVE_C11
@ -78,7 +80,7 @@ static atomic_bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
static _Bool blas_buffer_inuse[MAX_PARALLEL_NUMBER];
#endif
static void adjust_thread_buffers() {
static void adjust_thread_buffers(void) {
int i=0, j=0;
@ -100,8 +102,6 @@ static void adjust_thread_buffers() {
void goto_set_num_threads(int num_threads) {
blas_num_threads_set = 1;
if (num_threads < 0) blas_num_threads_set = 0;
if (num_threads < 1) num_threads = blas_num_threads;
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
@ -126,6 +126,17 @@ void openblas_set_num_threads(int num_threads) {
int blas_thread_init(void){
#if defined(__FreeBSD__) && defined(__clang__)
extern int openblas_omp_num_threads_env(void);
if(blas_omp_number_max <= 0)
blas_omp_number_max= openblas_omp_num_threads_env();
if (blas_omp_number_max <= 0)
blas_omp_number_max=MAX_CPU_NUMBER;
#else
blas_omp_number_max = omp_get_max_threads();
#endif
blas_get_cpu_number();
adjust_thread_buffers();

View File

@ -48,34 +48,38 @@
#endif
#endif
#ifdef SMP_DEBUG
# define MT_TRACE(...) fprintf(stderr, __VA_ARGS__)
#else
# define MT_TRACE(...)
#endif
/* This is a thread implementation for Win32 lazy implementation */
/* Thread server common information */
typedef struct{
CRITICAL_SECTION lock;
HANDLE filled;
HANDLE killed;
blas_queue_t *queue; /* Parameter Pointer */
int shutdown; /* server shutdown flag */
} blas_pool_t;
static blas_queue_t *work_queue = NULL;
static HANDLE kickoff_event = NULL;
static CRITICAL_SECTION queue_lock;
/* We need this global for checking if initialization is finished. */
int blas_server_avail = 0;
int blas_omp_threads_local = 1;
/* Local Variables */
static BLASULONG server_lock = 0;
static blas_pool_t pool;
static HANDLE blas_threads [MAX_CPU_NUMBER];
static DWORD blas_threads_id[MAX_CPU_NUMBER];
static volatile int thread_target; // target num of live threads, volatile for cross-thread reads
//
// Legacy code path
//
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb) {
static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
if (!(mode & BLAS_COMPLEX)){
if (!(mode & BLAS_COMPLEX)) {
#ifdef EXPRECISION
if ((mode & BLAS_PREC) == BLAS_XDOUBLE){
/* REAL / Extended Double */
@ -90,7 +94,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> c, args -> ldc, sb);
} else
#endif
if ((mode & BLAS_PREC) == BLAS_DOUBLE){
if ((mode & BLAS_PREC) == BLAS_DOUBLE) {
/* REAL / Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, double *, BLASLONG,
@ -101,7 +105,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> a, args -> lda,
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
} else if ((mode & BLAS_PREC) == BLAS_SINGLE){
} else if ((mode & BLAS_PREC) == BLAS_SINGLE) {
/* REAL / Single */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, float *, BLASLONG,
@ -113,7 +117,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
#ifdef BUILD_BFLOAT16
} else if ((mode & BLAS_PREC) == BLAS_BFLOAT16){
} else if ((mode & BLAS_PREC) == BLAS_BFLOAT16) {
/* REAL / BFLOAT16 */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, bfloat16,
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG,
@ -124,7 +128,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> a, args -> lda,
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
} else if ((mode & BLAS_PREC) == BLAS_STOBF16){
} else if ((mode & BLAS_PREC) == BLAS_STOBF16) {
/* REAL / BLAS_STOBF16 */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, float,
float *, BLASLONG, bfloat16 *, BLASLONG,
@ -135,7 +139,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> a, args -> lda,
args -> b, args -> ldb,
args -> c, args -> ldc, sb);
} else if ((mode & BLAS_PREC) == BLAS_DTOBF16){
} else if ((mode & BLAS_PREC) == BLAS_DTOBF16) {
/* REAL / BLAS_DTOBF16 */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double,
double *, BLASLONG, bfloat16 *, BLASLONG,
@ -152,7 +156,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
}
} else {
#ifdef EXPRECISION
if ((mode & BLAS_PREC) == BLAS_XDOUBLE){
if ((mode & BLAS_PREC) == BLAS_XDOUBLE) {
/* COMPLEX / Extended Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
xdouble *, BLASLONG, xdouble *, BLASLONG,
@ -166,7 +170,7 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
args -> c, args -> ldc, sb);
} else
#endif
if ((mode & BLAS_PREC) == BLAS_DOUBLE){
if ((mode & BLAS_PREC) == BLAS_DOUBLE) {
/* COMPLEX / Double */
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, double, double,
double *, BLASLONG, double *, BLASLONG,
@ -196,88 +200,78 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){
}
}
/* This is a main routine of threads. Each thread waits until job is */
/* queued. */
static DWORD WINAPI blas_thread_server(void *arg){
//
// This is a main routine of threads. Each thread waits until job is queued.
//
static DWORD WINAPI blas_thread_server(void *arg) {
/* Thread identifier */
#ifdef SMP_DEBUG
BLASLONG cpu = (BLASLONG)arg;
#endif
void *buffer, *sa, *sb;
blas_queue_t *queue;
DWORD action;
HANDLE handles[] = {pool.filled, pool.killed};
/* Each server needs each buffer */
buffer = blas_memory_alloc(2);
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Thread is started!\n", cpu);
#endif
MT_TRACE("Server[%2ld] Thread is started!\n", cpu);
while (1){
while (1) {
/* Waiting for Queue */
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Waiting for Queue.\n", cpu);
#endif
MT_TRACE("Server[%2ld] Waiting for Queue.\n", cpu);
do {
action = WaitForMultipleObjects(2, handles, FALSE, INFINITE);
} while ((action != WAIT_OBJECT_0) && (action != WAIT_OBJECT_0 + 1));
// event raised when work is added to the queue
WaitForSingleObject(kickoff_event, INFINITE);
if (action == WAIT_OBJECT_0 + 1) break;
if (cpu > thread_target - 2) {
//MT_TRACE("thread [%d] exiting.\n", cpu);
break; // excess thread, so worker thread exits
}
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Got it.\n", cpu);
#endif
MT_TRACE("Server[%2ld] Got it.\n", cpu);
EnterCriticalSection(&pool.lock);
EnterCriticalSection(&queue_lock);
queue = pool.queue;
if (queue) pool.queue = queue->next;
queue = work_queue;
if (queue)
work_queue = work_queue->next;
LeaveCriticalSection(&pool.lock);
LeaveCriticalSection(&queue_lock);
if (queue) {
if (queue) {
int (*routine)(blas_arg_t *, void *, void *, void *, void *, BLASLONG) = queue -> routine;
if (pool.queue) SetEvent(pool.filled);
sa = queue -> sa;
sb = queue -> sb;
#ifdef CONSISTENT_FPCSR
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
#endif
#ifdef CONSISTENT_FPCSR
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode));
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode));
#endif
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
MT_TRACE("Server[%2ld] Started. Mode = 0x%03x M = %3ld N=%3ld K=%3ld\n",
cpu, queue->mode, queue-> args ->m, queue->args->n, queue->args->k);
#endif
// fprintf(stderr, "queue start[%ld]!!!\n", cpu);
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
#endif
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING1;
#endif
if (sa == NULL) sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
if (sa == NULL)
sa = (void *)((BLASLONG)buffer + GEMM_OFFSET_A);
if (sb == NULL) {
if (!(queue -> mode & BLAS_COMPLEX)){
if (!(queue -> mode & BLAS_COMPLEX)) {
#ifdef EXPRECISION
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE){
if ((queue -> mode & BLAS_PREC) == BLAS_XDOUBLE) {
sb = (void *)(((BLASLONG)sa + ((XGEMM_P * XGEMM_Q * sizeof(xdouble)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
} else
#endif
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE){
if ((queue -> mode & BLAS_PREC) == BLAS_DOUBLE) {
#ifdef BUILD_DOUBLE
sb = (void *)(((BLASLONG)sa + ((DGEMM_P * DGEMM_Q * sizeof(double)
+ GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
@ -311,70 +305,59 @@ static DWORD WINAPI blas_thread_server(void *arg){
/* Other types in future */
}
}
queue->sb=sb;
queue->sb=sb;
}
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING2;
#endif
#ifdef MONITOR
main_status[cpu] = MAIN_RUNNING2;
#endif
if (!(queue -> mode & BLAS_LEGACY)) {
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
(routine)(queue -> args, queue -> range_m, queue -> range_n, sa, sb, queue -> position);
} else {
legacy_exec(routine, queue -> mode, queue -> args, sb);
legacy_exec(routine, queue -> mode, queue -> args, sb);
}
}else{
continue; //if queue == NULL
}
} else {
continue; //if queue == NULL
}
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Finished!\n", cpu);
#endif
EnterCriticalSection(&queue->lock);
queue -> status = BLAS_STATUS_FINISHED;
LeaveCriticalSection(&queue->lock);
SetEvent(queue->finish);
MT_TRACE("Server[%2ld] Finished!\n", cpu);
queue->finished = 1;
}
/* Shutdown procedure */
#ifdef SMP_DEBUG
fprintf(STDERR, "Server[%2ld] Shutdown!\n", cpu);
#endif
MT_TRACE("Server[%2ld] Shutdown!\n", cpu);
blas_memory_free(buffer);
return 0;
}
}
/* Initializing routine */
int blas_thread_init(void){
//
// Initializing routine
//
int blas_thread_init(void) {
BLASLONG i;
if (blas_server_avail || (blas_cpu_number <= 1)) return 0;
LOCK_COMMAND(&server_lock);
#ifdef SMP_DEBUG
fprintf(STDERR, "Initializing Thread(Num. threads = %d)\n",
blas_cpu_number);
#endif
MT_TRACE("Initializing Thread(Num. threads = %d)\n", blas_cpu_number);
if (!blas_server_avail){
if (!blas_server_avail) {
// create the kickoff Event
kickoff_event = CreateEvent(NULL, TRUE, FALSE, NULL);
InitializeCriticalSection(&pool.lock);
pool.filled = CreateEvent(NULL, FALSE, FALSE, NULL);
pool.killed = CreateEvent(NULL, TRUE, FALSE, NULL);
thread_target = blas_cpu_number;
pool.shutdown = 0;
pool.queue = NULL;
InitializeCriticalSection(&queue_lock);
for(i = 0; i < blas_cpu_number - 1; i++) {
//MT_TRACE("thread_init: creating thread [%d]\n", i);
for(i = 0; i < blas_cpu_number - 1; i++){
blas_threads[i] = CreateThread(NULL, 0,
blas_thread_server, (void *)i,
0, &blas_threads_id[i]);
@ -388,15 +371,12 @@ int blas_thread_init(void){
return 0;
}
/*
User can call one of two routines.
exec_blas_async ... immediately returns after jobs are queued.
exec_blas ... returns after jobs are finished.
*/
int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
//
// User can call one of two routines.
// exec_blas_async ... immediately returns after jobs are queued.
// exec_blas ... returns after jobs are finished.
//
int exec_blas_async(BLASLONG pos, blas_queue_t *queue) {
#if defined(SMP_SERVER)
// Handle lazy re-init of the thread-pool after a POSIX fork
@ -409,8 +389,6 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
current = queue;
while (current) {
InitializeCriticalSection(&current -> lock);
current -> finish = CreateEvent(NULL, FALSE, FALSE, NULL);
current -> position = pos;
#ifdef CONSISTENT_FPCSR
@ -418,56 +396,71 @@ int exec_blas_async(BLASLONG pos, blas_queue_t *queue){
__asm__ __volatile__ ("stmxcsr %0" : "=m" (current -> sse_mode));
#endif
current->finished = 0;
current = current -> next;
pos ++;
}
EnterCriticalSection(&pool.lock);
EnterCriticalSection(&queue_lock);
if (pool.queue) {
current = pool.queue;
while (current -> next) current = current -> next;
current -> next = queue;
} else {
pool.queue = queue;
if (!work_queue)
{
work_queue = queue;
}
else
{
blas_queue_t *next_item = work_queue;
// find the end of the work queue
while (next_item)
next_item = next_item->next;
// add new work to the end
next_item = queue;
}
LeaveCriticalSection(&pool.lock);
LeaveCriticalSection(&queue_lock);
SetEvent(pool.filled);
SetEvent(kickoff_event);
return 0;
}
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue){
//
// Join. Wait for all queued tasks to complete
//
int exec_blas_async_wait(BLASLONG num, blas_queue_t *queue) {
#ifdef SMP_DEBUG
fprintf(STDERR, "Synchronization Waiting.\n");
#endif
MT_TRACE("Synchronization Waiting.\n");
while (num){
#ifdef SMP_DEBUG
fprintf(STDERR, "Waiting Queue ..\n");
#endif
while (num) {
MT_TRACE("Waiting Queue ..\n");
WaitForSingleObject(queue->finish, INFINITE);
while (!queue->finished)
YIELDING;
CloseHandle(queue->finish);
DeleteCriticalSection(&queue -> lock);
queue = queue->next;
num--;
}
queue = queue -> next;
num --;
}
MT_TRACE("Completely Done.\n\n");
#ifdef SMP_DEBUG
fprintf(STDERR, "Completely Done.\n\n");
#endif
// if work was added to the queue after this batch we can't sleep the worker threads
// by resetting the event
EnterCriticalSection(&queue_lock);
return 0;
if (work_queue == NULL)
ResetEvent(kickoff_event);
LeaveCriticalSection(&queue_lock);
return 0;
}
/* Execute Threads */
int exec_blas(BLASLONG num, blas_queue_t *queue){
//
// Execute Threads
//
int exec_blas(BLASLONG num, blas_queue_t *queue) {
#if defined(SMP_SERVER) && defined(OS_CYGWIN_NT)
// Handle lazy re-init of the thread-pool after a POSIX fork
@ -480,29 +473,33 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){
if ((num <= 0) || (queue == NULL)) return 0;
if ((num > 1) && queue -> next) exec_blas_async(1, queue -> next);
if ((num > 1) && queue -> next)
exec_blas_async(1, queue -> next);
routine = queue -> routine;
if (queue -> mode & BLAS_LEGACY) {
legacy_exec(routine, queue -> mode, queue -> args, queue -> sb);
} else
} else {
if (queue -> mode & BLAS_PTHREAD) {
void (*pthreadcompat)(void *) = queue -> routine;
(pthreadcompat)(queue -> args);
} else
(routine)(queue -> args, queue -> range_m, queue -> range_n,
queue -> sa, queue -> sb, 0);
queue -> sa, queue -> sb, 0);
}
if ((num > 1) && queue -> next) exec_blas_async_wait(num - 1, queue -> next);
if ((num > 1) && queue -> next)
exec_blas_async_wait(num - 1, queue -> next);
return 0;
}
/* Shutdown procedure, but user don't have to call this routine. The */
/* kernel automatically kill threads. */
int BLASFUNC(blas_thread_shutdown)(void){
//
// Shutdown procedure, but user don't have to call this routine. The
// kernel automatically kill threads.
//
int BLASFUNC(blas_thread_shutdown)(void) {
int i;
@ -510,11 +507,9 @@ int BLASFUNC(blas_thread_shutdown)(void){
LOCK_COMMAND(&server_lock);
if (blas_server_avail){
if (blas_server_avail) {
SetEvent(pool.killed);
for(i = 0; i < blas_num_threads - 1; i++){
for (i = 0; i < blas_num_threads - 1; i++) {
// Could also just use WaitForMultipleObjects
DWORD wait_thread_value = WaitForSingleObject(blas_threads[i], 50);
@ -528,9 +523,6 @@ int BLASFUNC(blas_thread_shutdown)(void){
CloseHandle(blas_threads[i]);
}
CloseHandle(pool.filled);
CloseHandle(pool.killed);
blas_server_avail = 0;
}
@ -539,6 +531,9 @@ int BLASFUNC(blas_thread_shutdown)(void){
return 0;
}
//
// Legacy function to set numbef of threads
//
void goto_set_num_threads(int num_threads)
{
long i;
@ -552,23 +547,48 @@ void goto_set_num_threads(int num_threads)
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER;
if (blas_server_avail && num_threads < blas_num_threads) {
LOCK_COMMAND(&server_lock);
thread_target = num_threads;
SetEvent(kickoff_event);
for (i = num_threads - 1; i < blas_num_threads - 1; i++) {
//MT_TRACE("set_num_threads: waiting on thread [%d] to quit.\n", i);
WaitForSingleObject(blas_threads[i], INFINITE);
//MT_TRACE("set_num_threads: thread [%d] has quit.\n", i);
CloseHandle(blas_threads[i]);
}
blas_num_threads = num_threads;
ResetEvent(kickoff_event);
UNLOCK_COMMAND(&server_lock);
}
if (num_threads > blas_num_threads) {
LOCK_COMMAND(&server_lock);
//increased_threads = 1;
if (!blas_server_avail){
thread_target = num_threads;
InitializeCriticalSection(&pool.lock);
pool.filled = CreateEvent(NULL, FALSE, FALSE, NULL);
pool.killed = CreateEvent(NULL, TRUE, FALSE, NULL);
//increased_threads = 1;
if (!blas_server_avail) {
// create the kickoff Event
kickoff_event = CreateEvent(NULL, TRUE, FALSE, NULL);
InitializeCriticalSection(&queue_lock);
pool.shutdown = 0;
pool.queue = NULL;
blas_server_avail = 1;
}
for(i = blas_num_threads - 1; i < num_threads - 1; i++){
for (i = (blas_num_threads > 0) ? blas_num_threads - 1 : 0; i < num_threads - 1; i++) {
//MT_TRACE("set_num_threads: creating thread [%d]\n", i);
blas_threads[i] = CreateThread(NULL, 0,
blas_thread_server, (void *)i,
@ -583,6 +603,9 @@ void goto_set_num_threads(int num_threads)
blas_cpu_number = num_threads;
}
//
// Openblas function to set thread count
//
void openblas_set_num_threads(int num)
{
goto_set_num_threads(num);

Some files were not shown because too many files have changed in this diff Show More