Compare commits
No commits in common. "develop" and "revert-2566-azurewin" have entirely different histories.
develop
...
revert-256
174
.cirrus.yml
174
.cirrus.yml
|
@ -1,174 +0,0 @@
|
|||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest
|
||||
|
||||
#task:
|
||||
# name: AppleM1/LLVM
|
||||
# compile_script:
|
||||
# - brew install llvm
|
||||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang
|
||||
|
||||
#task:
|
||||
# name: AppleM1/LLVM/ILP64
|
||||
# compile_script:
|
||||
# - brew install llvm
|
||||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang INTERFACE64=1
|
||||
|
||||
#task:
|
||||
# name: AppleM1/LLVM/CMAKE
|
||||
# compile_script:
|
||||
# - brew install llvm
|
||||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
# - mkdir build
|
||||
# - cd build
|
||||
# - cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON ..
|
||||
# - make -j 4
|
||||
|
||||
#task:
|
||||
# name: AppleM1/GCC/MAKE/OPENMP
|
||||
# compile_script:
|
||||
# - brew install gcc@11
|
||||
# - export PATH=/opt/homebrew/bin:$PATH
|
||||
# - export LDFLAGS="-L/opt/homebrew/lib"
|
||||
# - export CPPFLAGS="-I/opt/homebrew/include"
|
||||
# - make CC=gcc-11 FC=gfortran-11 USE_OPENMP=1
|
||||
|
||||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
|
||||
task:
|
||||
name: AppleM1/LLVM x86_64 xbuild
|
||||
compile_script:
|
||||
- #brew install llvm
|
||||
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
- export ARCHS="i386 x86_64"
|
||||
- export ARCHS_STANDARD="i386 x86_64"
|
||||
- export ARCHS_STANDARD_32_64_BIT="i386 x86_64"
|
||||
- export ARCHS_STANDARD_64_BIT=x86_64
|
||||
- export ARCHS_STANDARD_INCLUDING_64_BIT="i386 x86_64"
|
||||
- export ARCHS_UNIVERSAL_IPHONE_OS="i386 x86_64"
|
||||
- export VALID_ARCHS="i386 x86_64"
|
||||
- xcrun --sdk macosx --show-sdk-path
|
||||
- xcodebuild -version
|
||||
- export CC=/Applications/Xcode_15.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_15.4.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX14.5.sdk -arch x86_64"
|
||||
- make TARGET=CORE2 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
|
||||
always:
|
||||
config_artifacts:
|
||||
path: "*conf*"
|
||||
type: text/plain
|
||||
# lib_artifacts:
|
||||
# path: "libopenblas*"
|
||||
# type: application/octet-streamm
|
||||
|
||||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
|
||||
task:
|
||||
name: AppleM1/LLVM armv8-ios xbuild
|
||||
compile_script:
|
||||
- #brew install llvm
|
||||
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
|
||||
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
- export CC=/Applications/Xcode_15.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_15.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS17.5.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
- xcrun --sdk iphoneos --show-sdk-path
|
||||
- ls -l /Applications
|
||||
- make TARGET=ARMV8 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 CROSS=1
|
||||
always:
|
||||
config_artifacts:
|
||||
path: "*conf*"
|
||||
type: text/plain
|
||||
|
||||
macos_instance:
|
||||
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
|
||||
task:
|
||||
name: AppleM1/LLVM armv7-androidndk xbuild
|
||||
compile_script:
|
||||
- brew install --cask android-ndk
|
||||
- export ANDROID_NDK_HOME="/opt/homebrew/share/android-ndk"
|
||||
- export CC=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang
|
||||
- make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
|
||||
always:
|
||||
config_artifacts:
|
||||
path: "*conf*"
|
||||
type: text/plain
|
||||
|
||||
task:
|
||||
name: NeoverseN1
|
||||
arm_container:
|
||||
image: node:latest
|
||||
compile_script:
|
||||
- make
|
||||
|
||||
task:
|
||||
name: NeoverseN1-ILP64
|
||||
arm_container:
|
||||
image: node:latest
|
||||
compile_script:
|
||||
- make INTERFACE64=1
|
||||
|
||||
task:
|
||||
name: NeoverseN1-OMP
|
||||
arm_container:
|
||||
image: node:latest
|
||||
cpu: 8
|
||||
compile_script:
|
||||
- make USE_OPENMP=1
|
||||
|
||||
FreeBSD_task:
|
||||
name: FreeBSD-gcc12
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-3
|
||||
install_script:
|
||||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
|
||||
compile_script:
|
||||
- ls -l /usr/local/lib
|
||||
- gmake CC=gcc
|
||||
|
||||
|
||||
FreeBSD_task:
|
||||
name: freebsd-gcc12-ilp64
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-3
|
||||
install_script:
|
||||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
|
||||
compile_script:
|
||||
- ls -l /usr/local/lib
|
||||
- gmake CC=gcc INTERFACE64=1
|
||||
|
||||
FreeBSD_task:
|
||||
name: FreeBSD-clang-openmp
|
||||
freebsd_instance:
|
||||
image_family: freebsd-13-3
|
||||
install_script:
|
||||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
|
||||
- ln -s /usr/local/lib/gcc13/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
|
||||
compile_script:
|
||||
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1
|
||||
|
||||
#task:
|
||||
# name: Windows/LLVM16 --- too slow ---
|
||||
# windows_container:
|
||||
# image: cirrusci/windowsservercore:cmake-2021.12.07
|
||||
# install_script:
|
||||
# - choco list --localonly
|
||||
# - choco install -y llvm
|
||||
# - # choco install -y cmake --installargs '"ADD_CMAKE_TO_PATH=System"'
|
||||
# - choco install -y ninja
|
||||
# - refreshenv
|
||||
# - cd "c:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Auxiliary/Build"
|
||||
# - vcvarsall x64
|
||||
# - cd "C:\Users\ContainerAdministrator\AppData\Local\Temp\cirrus-ci-build"
|
||||
# - cmake -S . -B build -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release
|
||||
# - cd build
|
||||
# - cmake --build .
|
||||
# - ctest
|
16
.cirun.yml
16
.cirun.yml
|
@ -1,16 +0,0 @@
|
|||
# Self-Hosted Github Action Runners on AWS via Cirun.io
|
||||
# Reference: https://docs.cirun.io/reference/yaml
|
||||
runners:
|
||||
- name: "aws-runner-graviton"
|
||||
# Cloud Provider: AWS
|
||||
cloud: "aws"
|
||||
region: "us-east-1"
|
||||
# Cheapest VM on AWS
|
||||
instance_type: "c7g.large"
|
||||
# Ubuntu-22.04, ami image
|
||||
machine_image: "ami-0a0c8eebcdd6dcbd0"
|
||||
preemptible: false
|
||||
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
|
||||
# So that this runner is created for running the workflow
|
||||
labels:
|
||||
- "cirun-aws-runner-graviton"
|
24
.drone.yml
24
.drone.yml
|
@ -190,27 +190,3 @@ steps:
|
|||
- make -C ctest $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
- make -C cpp_thread_test dgemm_tester
|
||||
---
|
||||
kind: pipeline
|
||||
name: arm64_gcc10
|
||||
|
||||
platform:
|
||||
os: linux
|
||||
arch: arm64
|
||||
|
||||
steps:
|
||||
- name: Build and Test
|
||||
image: ubuntu:20.04
|
||||
environment:
|
||||
CC: gcc-10
|
||||
FC: gfortran-10
|
||||
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1'
|
||||
commands:
|
||||
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
|
||||
- apt-get update -y
|
||||
- apt-get install -y make $CC gfortran-10 perl python g++
|
||||
- $CC --version
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS
|
||||
- make -C utest $COMMON_FLAGS
|
||||
- make -C test $COMMON_FLAGS
|
||||
|
||||
|
|
|
@ -1,149 +0,0 @@
|
|||
name: apple m
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: macos-14
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
build: [cmake, make]
|
||||
fortran: [gfortran]
|
||||
openmp: [0, 1]
|
||||
ilp64: [0, 1]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
sysctl -a | grep machdep.cpu
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
|
||||
brew reinstall gcc
|
||||
brew install coreutils cmake ccache
|
||||
brew install llvm
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
elif [ "$RUNNER_OS" = "macOS" ]; then
|
||||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
|
||||
echo "/opt/homebrew/opt/llvm/bin" >>$GITHUB_PATH
|
||||
echo "" >>$GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
|
||||
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
|
||||
export CC="/opt/homebrew/opt/llvm/bin/clang"
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=${{matrix.openmp}} INTERFACE64=${{matrix.ilp64}} FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
export LDFLAGS="$LDFLAGS -Wl,-ld_classic"
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DUSE_OPENMP=${{matrix.openmp}} \
|
||||
-DINTERFACE64=${{matrix.ilp64}} \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
|
||||
echo "::group::Tests in 'test' directory"
|
||||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'ctest' directory"
|
||||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'utest' directory"
|
||||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
;;
|
||||
"cmake")
|
||||
cd build && ctest
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
|
@ -1,139 +0,0 @@
|
|||
name: arm64 graviton cirun
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
- release-**
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
- release-**
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
fortran: [gfortran]
|
||||
build: [cmake, make]
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt update
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
|
||||
echo "::group::Tests in 'test' directory"
|
||||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'ctest' directory"
|
||||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'utest' directory"
|
||||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
;;
|
||||
"cmake")
|
||||
cd build && ctest
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
|
@ -1,127 +0,0 @@
|
|||
name: c910v qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618
|
||||
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: RISCV64_GENERIC
|
||||
triple: riscv64-linux-gnu
|
||||
apt_triple: riscv64-linux-gnu
|
||||
opts: NO_SHARED=1 TARGET=RISCV64_GENERIC
|
||||
- target: C910V
|
||||
triple: riscv64-unknown-linux-gnu
|
||||
apt_triple: riscv64-linux-gnu
|
||||
opts: NO_SHARED=1 TARGET=C910V
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
|
||||
gcc-${{ matrix.apt_triple }} gfortran-${{ matrix.apt_triple }} libgomp1-riscv64-cross
|
||||
|
||||
- name: checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: T-head-Semi/qemu
|
||||
path: qemu
|
||||
ref: 1e692ebb43d396c52352406323fc782c1ac99a42
|
||||
|
||||
- name: build qemu
|
||||
run: |
|
||||
# Force use c910v qemu-user
|
||||
wget https://github.com/revyos/qemu/commit/5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
|
||||
cd qemu
|
||||
patch -p1 < ../5164bca5a4bcde4534dc1a9aa3a7f619719874cf.patch
|
||||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS
|
||||
run: |
|
||||
wget ${xuetie_toolchain}/${toolchain_file_name}
|
||||
tar -xvf ${toolchain_file_name} -C /opt
|
||||
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH"
|
||||
|
||||
make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
|
||||
qemu-riscv64 ./utest/openblas_utest
|
||||
qemu-riscv64 ./utest/openblas_utest_ext
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat
|
|
@ -1,157 +0,0 @@
|
|||
name: Run codspeed benchmarks
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
benchmarks:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest]
|
||||
fortran: [gfortran]
|
||||
build: [make]
|
||||
pyver: ["3.12"]
|
||||
runs-on: ${{ matrix.os }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/setup-python@v3
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Write out the .pc
|
||||
run: |
|
||||
cd benchmark/pybench
|
||||
cat > openblas.pc << EOF
|
||||
libdir=${{ github.workspace }}
|
||||
includedir= ${{ github.workspace }}
|
||||
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
|
||||
version=0.0.99
|
||||
extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
|
||||
Name: openblas
|
||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: ${version}
|
||||
URL: https://github.com/xianyi/OpenBLAS
|
||||
Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }}
|
||||
Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
|
||||
Cflags: -I${{ github.workspace}}
|
||||
EOF
|
||||
cat openblas.pc
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
elif [ "$RUNNER_OS" = "macOS" ]; then
|
||||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Install benchmark dependencies
|
||||
run: pip install meson ninja numpy pytest pytest-codspeed --user
|
||||
|
||||
- name: Build the wrapper
|
||||
run: |
|
||||
cd benchmark/pybench
|
||||
export PKG_CONFIG_PATH=$PWD
|
||||
meson setup build --prefix=$PWD/build-install
|
||||
meson install -C build
|
||||
#
|
||||
# sanity check
|
||||
cd build/openblas_wrap
|
||||
python -c'import _flapack; print(dir(_flapack))'
|
||||
|
||||
- name: Run benchmarks under pytest-benchmark
|
||||
run: |
|
||||
cd benchmark/pybench
|
||||
pip install pytest-benchmark
|
||||
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
|
||||
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py -k 'gesdd'
|
||||
|
||||
- name: Run benchmarks
|
||||
uses: CodSpeedHQ/action@v2
|
||||
with:
|
||||
token: ${{ secrets.CODSPEED_TOKEN }}
|
||||
run: |
|
||||
cd benchmark/pybench
|
||||
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
|
||||
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
name: Publish docs via GitHub Pages
|
||||
|
||||
on:
|
||||
push:
|
||||
branches:
|
||||
- develop
|
||||
pull_request:
|
||||
branches:
|
||||
- develop
|
||||
|
||||
jobs:
|
||||
build:
|
||||
name: Deploy docs
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.10"
|
||||
|
||||
- name: Install MkDocs and doc theme packages
|
||||
run: pip install mkdocs mkdocs-material mkdocs-git-revision-date-localized-plugin
|
||||
|
||||
- name: Build docs site
|
||||
run: mkdocs build
|
||||
|
||||
# mkdocs gh-deploy command only builds to the top-level, hence deploying
|
||||
# with this action instead.
|
||||
# Deploys to http://www.openmathlib.org/OpenBLAS/docs/
|
||||
- name: Deploy docs
|
||||
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
|
||||
if: ${{ github.ref == 'refs/heads/develop' }}
|
||||
with:
|
||||
github_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
publish_dir: ./site
|
||||
destination_dir: docs/
|
|
@ -1,371 +0,0 @@
|
|||
name: continuous build
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
os: [ubuntu-latest, macos-latest]
|
||||
fortran: [gfortran, flang]
|
||||
build: [cmake, make]
|
||||
exclude:
|
||||
- os: macos-latest
|
||||
fortran: flang
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Print system information
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
cat /proc/cpuinfo
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
sysctl -a | grep machdep.cpu
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
if [ "$RUNNER_OS" == "Linux" ]; then
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gfortran cmake ccache libtinfo5
|
||||
elif [ "$RUNNER_OS" == "macOS" ]; then
|
||||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
|
||||
brew reinstall gcc
|
||||
brew install coreutils cmake ccache
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
# GNU make and cmake call the compilers differently. It looks like
|
||||
# that causes the cache to mismatch. Keep the ccache for both build
|
||||
# tools separate to avoid polluting each other.
|
||||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
|
||||
ccache-${{ runner.os }}-${{ matrix.build }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
if [ "${{ matrix.build }}" = "make" ]; then
|
||||
# Add ccache to path
|
||||
if [ "$RUNNER_OS" = "Linux" ]; then
|
||||
echo "/usr/lib/ccache" >> $GITHUB_PATH
|
||||
elif [ "$RUNNER_OS" = "macOS" ]; then
|
||||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
|
||||
else
|
||||
echo "::error::$RUNNER_OS not supported"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
if [ "${{ matrix.fortran }}" = "flang" ]; then
|
||||
# download and install classic flang
|
||||
cd /usr/
|
||||
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
|
||||
sudo tar xf flang-20190329-x86-70.tgz
|
||||
sudo rm flang-20190329-x86-70.tgz
|
||||
cd -
|
||||
fi
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
|
||||
;;
|
||||
"cmake")
|
||||
mkdir build && cd build
|
||||
cmake -DDYNAMIC_ARCH=1 \
|
||||
-DNOFORTRAN=0 \
|
||||
-DBUILD_WITHOUT_LAPACK=0 \
|
||||
-DCMAKE_VERBOSE_MAKEFILE=ON \
|
||||
-DCMAKE_BUILD_TYPE=Release \
|
||||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
cmake --build .
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Run tests
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
case "${{ matrix.build }}" in
|
||||
"make")
|
||||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
|
||||
echo "::group::Tests in 'test' directory"
|
||||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'ctest' directory"
|
||||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
echo "::group::Tests in 'utest' directory"
|
||||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
|
||||
echo "::endgroup::"
|
||||
;;
|
||||
"cmake")
|
||||
cd build && ctest
|
||||
;;
|
||||
*)
|
||||
echo "::error::Configuration not supported"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
msys2:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: windows-latest
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
msystem: [UCRT64, MINGW32, CLANG64, CLANG32]
|
||||
idx: [int32, int64]
|
||||
build-type: [Release]
|
||||
include:
|
||||
- msystem: UCRT64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
- msystem: MINGW32
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-i686
|
||||
fc-pkg: fc
|
||||
- msystem: CLANG64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-clang-x86_64
|
||||
fc-pkg: fc
|
||||
# Compiling with Flang 16 seems to cause test errors on machines
|
||||
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
|
||||
no-avx512-flags: -DNO_AVX512=1
|
||||
- msystem: CLANG32
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-clang-i686
|
||||
fc-pkg: cc
|
||||
c-lapack-flags: -DC_LAPACK=ON
|
||||
- msystem: UCRT64
|
||||
idx: int64
|
||||
idx64-flags: -DBINARY=64 -DINTERFACE64=1
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
- msystem: CLANG64
|
||||
idx: int64
|
||||
idx64-flags: -DBINARY=64 -DINTERFACE64=1
|
||||
target-prefix: mingw-w64-clang-x86_64
|
||||
fc-pkg: fc
|
||||
# Compiling with Flang 16 seems to cause test errors on machines
|
||||
# with AVX512 instructions. Revisit after MSYS2 distributes Flang 17.
|
||||
no-avx512-flags: -DNO_AVX512=1
|
||||
- msystem: UCRT64
|
||||
idx: int32
|
||||
target-prefix: mingw-w64-ucrt-x86_64
|
||||
fc-pkg: fc
|
||||
build-type: None
|
||||
exclude:
|
||||
- msystem: MINGW32
|
||||
idx: int64
|
||||
- msystem: CLANG32
|
||||
idx: int64
|
||||
|
||||
defaults:
|
||||
run:
|
||||
# Use MSYS2 bash as default shell
|
||||
shell: msys2 {0}
|
||||
|
||||
env:
|
||||
CHERE_INVOKING: 1
|
||||
|
||||
steps:
|
||||
- name: Get CPU name
|
||||
shell: pwsh
|
||||
run : |
|
||||
Get-CIMInstance -Class Win32_Processor | Select-Object -Property Name
|
||||
|
||||
- name: Install build dependencies
|
||||
uses: msys2/setup-msys2@v2
|
||||
with:
|
||||
msystem: ${{ matrix.msystem }}
|
||||
update: true
|
||||
release: false # Use pre-installed version
|
||||
install: >-
|
||||
base-devel
|
||||
${{ matrix.target-prefix }}-cc
|
||||
${{ matrix.target-prefix }}-${{ matrix.fc-pkg }}
|
||||
${{ matrix.target-prefix }}-cmake
|
||||
${{ matrix.target-prefix }}-ninja
|
||||
${{ matrix.target-prefix }}-ccache
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Prepare ccache
|
||||
# Get cache location of ccache
|
||||
# Create key that is used in action/cache/restore and action/cache/save steps
|
||||
id: ccache-prepare
|
||||
run: |
|
||||
echo "ccachedir=$(cygpath -m $(ccache -k cache_dir))" >> $GITHUB_OUTPUT
|
||||
# We include the commit sha in the cache key, as new cache entries are
|
||||
# only created if there is no existing entry for the key yet.
|
||||
echo "key=ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Restore ccache
|
||||
uses: actions/cache/restore@v3
|
||||
with:
|
||||
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
|
||||
key: ${{ steps.ccache-prepare.outputs.key }}
|
||||
# Restore a matching ccache cache entry. Prefer same branch.
|
||||
restore-keys: |
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}
|
||||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}
|
||||
|
||||
- name: Configure ccache
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota.
|
||||
run: |
|
||||
which ccache
|
||||
test -d ${{ steps.ccache-prepare.outputs.ccachedir }} || mkdir -p ${{ steps.ccache-prepare.outputs.ccachedir }}
|
||||
echo "max_size = 250M" > ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
|
||||
echo "compression = true" >> ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
|
||||
ccache -p
|
||||
ccache -s
|
||||
echo $HOME
|
||||
cygpath -w $HOME
|
||||
|
||||
- name: Configure OpenBLAS
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DBUILD_STATIC_LIBS=ON \
|
||||
-DDYNAMIC_ARCH=ON \
|
||||
-DUSE_THREAD=ON \
|
||||
-DNUM_THREADS=64 \
|
||||
-DTARGET=CORE2 \
|
||||
${{ matrix.idx64-flags }} \
|
||||
${{ matrix.c-lapack-flags }} \
|
||||
${{ matrix.no-avx512-flags }} \
|
||||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
|
||||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
|
||||
..
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: cd build && cmake --build .
|
||||
|
||||
- name: Show ccache status
|
||||
continue-on-error: true
|
||||
run: ccache -s
|
||||
|
||||
- name: Save ccache
|
||||
# Save the cache after we are done (successfully) building
|
||||
uses: actions/cache/save@v3
|
||||
with:
|
||||
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
|
||||
key: ${{ steps.ccache-prepare.outputs.key }}
|
||||
|
||||
- name: Run tests
|
||||
id: run-ctest
|
||||
timeout-minutes: 60
|
||||
run: cd build && ctest
|
||||
|
||||
- name: Re-run tests
|
||||
if: always() && (steps.run-ctest.outcome == 'failure')
|
||||
timeout-minutes: 60
|
||||
run: |
|
||||
cd build
|
||||
echo "::group::Re-run ctest"
|
||||
ctest --rerun-failed --output-on-failure || true
|
||||
echo "::endgroup::"
|
||||
echo "::group::Log from these tests"
|
||||
[ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log
|
||||
echo "::endgroup::"
|
||||
|
||||
|
||||
cross_build:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: mips64el
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
- target: riscv64
|
||||
triple: riscv64-linux-gnu
|
||||
opts: TARGET=RISCV64_GENERIC
|
||||
- target: mipsel
|
||||
triple: mipsel-linux-gnu
|
||||
opts: TARGET=MIPS1004K
|
||||
- target: alpha
|
||||
triple: alpha-linux-gnu
|
||||
opts: TARGET=EV4
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install Dependencies
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}
|
|
@ -1,119 +0,0 @@
|
|||
name: loongarch64 qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-24.04
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: LOONGSONGENERIC
|
||||
triple: loongarch64-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
|
||||
- target: LOONGSON3R5
|
||||
triple: loongarch64-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
|
||||
- target: LOONGSON2K1000
|
||||
triple: loongarch64-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
|
||||
- target: LA64_GENERIC
|
||||
triple: loongarch64-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
|
||||
- target: LA464
|
||||
triple: loongarch64-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
|
||||
- target: LA264
|
||||
triple: loongarch64-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
|
||||
- target: DYNAMIC_ARCH
|
||||
triple: loongarch64-linux-gnu
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install APT deps
|
||||
run: |
|
||||
sudo apt-get update && \
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache qemu-user-static \
|
||||
gcc-14-loongarch64-linux-gnu g++-14-loongarch64-linux-gnu gfortran-14-loongarch64-linux-gnu
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Disable utest dsdot:dsdot_n_1
|
||||
run: |
|
||||
echo -n > utest/test_dsdot.c
|
||||
echo "Due to the current version of qemu causing utest cases to fail,"
|
||||
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: |
|
||||
make CC='ccache ${{ matrix.triple }}-gcc-14 -static' FC='ccache ${{ matrix.triple }}-gfortran-14 -static' \
|
||||
RANLIB='ccache ${{ matrix.triple }}-gcc-ranlib-14' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
qemu-loongarch64-static ./utest/openblas_utest
|
||||
qemu-loongarch64-static ./utest/openblas_utest_ext
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
|
|
@ -1,141 +0,0 @@
|
|||
name: loongarch64 clang qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: LOONGSONGENERIC
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
|
||||
- target: LOONGSON3R5
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
|
||||
- target: LOONGSON2K1000
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
|
||||
- target: LA64_GENERIC
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
|
||||
- target: LA464
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
|
||||
- target: LA264
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
|
||||
- target: DYNAMIC_ARCH
|
||||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install libffi6
|
||||
run: |
|
||||
wget http://ftp.ca.debian.org/debian/pool/main/libf/libffi/libffi6_3.2.1-9_amd64.deb
|
||||
sudo dpkg -i libffi6_3.2.1-9_amd64.deb
|
||||
|
||||
- name: Install APT deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache
|
||||
|
||||
- name: Download and install loongarch64-toolchain
|
||||
run: |
|
||||
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz
|
||||
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz
|
||||
tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt
|
||||
tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt
|
||||
|
||||
- name: Checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: qemu/qemu
|
||||
path: qemu
|
||||
ref: master
|
||||
|
||||
- name: Install qemu
|
||||
run: |
|
||||
cd qemu
|
||||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
- name: Set env
|
||||
run: |
|
||||
echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: Disable utest dsdot:dsdot_n_1
|
||||
run: |
|
||||
echo -n > utest/test_dsdot.c
|
||||
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
|
||||
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."
|
||||
|
||||
- name: Build OpenBLAS
|
||||
run: make CC='ccache clang --target=loongarch64-linux-gnu --sysroot=/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/loongarch64-linux-gnu/sysroot/ -static' FC='ccache loongarch64-linux-gnu-gfortran -static' HOSTCC='ccache clang' CROSS_SUFFIX=llvm- NO_SHARED=1 ${{ matrix.opts }} -j$(nproc)
|
||||
|
||||
- name: Test
|
||||
run: |
|
||||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
|
||||
qemu-loongarch64 ./utest/openblas_utest
|
||||
qemu-loongarch64 ./utest/openblas_utest_ext
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
|
||||
|
|
@ -1,123 +0,0 @@
|
|||
name: mips64 qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: MIPS64_GENERIC
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
|
||||
- target: SICORTEX
|
||||
triple: mips64el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=SICORTEX
|
||||
- target: I6400
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6400
|
||||
- target: P6600
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=P6600
|
||||
- target: I6500
|
||||
triple: mipsisa64r6el-linux-gnuabi64
|
||||
opts: NO_SHARED=1 TARGET=I6500
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
|
||||
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross
|
||||
|
||||
- name: checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: qemu/qemu
|
||||
path: qemu
|
||||
ref: 79dfa177ae348bb5ab5f97c0915359b13d6186e2
|
||||
|
||||
- name: build qemu
|
||||
run: |
|
||||
cd qemu
|
||||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
|
||||
make -j$(nproc)
|
||||
make install
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS
|
||||
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)
|
||||
|
||||
- name: test
|
||||
run: |
|
||||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
|
||||
qemu-mips64el ./utest/openblas_utest
|
||||
qemu-mips64el ./utest/openblas_utest_ext
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT2.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
||||
rm -f ./test/?BLAT3.SUMM
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
|
||||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
|
|
@ -17,28 +17,16 @@ on:
|
|||
# it only makes sense to test if this file has been changed
|
||||
|
||||
name: Nightly-Homebrew-Build
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
build-OpenBLAS-with-Homebrew:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: macos-latest
|
||||
env:
|
||||
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
|
||||
HOMEBREW_DEVELOPER: "ON"
|
||||
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
|
||||
HOMEBREW_NO_ANALYTICS: "ON"
|
||||
HOMEBREW_NO_AUTO_UPDATE: "ON"
|
||||
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
|
||||
HOMEBREW_NO_INSTALL_CLEANUP: "ON"
|
||||
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON"
|
||||
HOMEBREW_NO_INSTALL_FROM_API: "ON"
|
||||
|
||||
steps:
|
||||
- name: Random delay for cron job
|
||||
|
@ -69,7 +57,7 @@ jobs:
|
|||
mv *.bottle.tar.gz bottles
|
||||
|
||||
- name: Upload bottle
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: openblas--HEAD.catalina.bottle.tar.gz
|
||||
path: bottles
|
||||
|
|
|
@ -1,256 +0,0 @@
|
|||
name: riscv64 zvl256b qemu test
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
|
||||
cancel-in-progress: true
|
||||
|
||||
permissions:
|
||||
contents: read # to fetch code (actions/checkout)
|
||||
|
||||
jobs:
|
||||
TEST:
|
||||
if: "github.repository == 'OpenMathLib/OpenBLAS'"
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
triple: riscv64-unknown-linux-gnu
|
||||
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain
|
||||
riscv_gnu_toolchain_version: 13.2.0
|
||||
riscv_gnu_toolchain_nightly_download_path: /releases/download/2024.02.02/riscv64-glibc-ubuntu-22.04-llvm-nightly-2024.02.02-nightly.tar.gz
|
||||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
include:
|
||||
- target: RISCV64_ZVL128B
|
||||
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64
|
||||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64
|
||||
- target: RISCV64_ZVL256B
|
||||
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
|
||||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
|
||||
- target: DYNAMIC_ARCH=1
|
||||
opts: TARGET=RISCV64_GENERIC BINARY=64 ARCH=riscv64 DYNAMIC_ARCH=1
|
||||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: install build deps
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install autoconf automake autotools-dev ninja-build make \
|
||||
libgomp1-riscv64-cross ccache
|
||||
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path}
|
||||
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt
|
||||
|
||||
- name: Compilation cache
|
||||
uses: actions/cache@v3
|
||||
with:
|
||||
path: ~/.ccache
|
||||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
|
||||
restore-keys: |
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
|
||||
ccache-${{ runner.os }}-${{ matrix.target }}
|
||||
|
||||
- name: Configure ccache
|
||||
run: |
|
||||
test -d ~/.ccache || mkdir -p ~/.ccache
|
||||
echo "max_size = 300M" > ~/.ccache/ccache.conf
|
||||
echo "compression = true" >> ~/.ccache/ccache.conf
|
||||
ccache -s
|
||||
|
||||
- name: build OpenBLAS libs
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc)
|
||||
|
||||
- name: build OpenBLAS tests
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='${triple}-gcc' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests
|
||||
|
||||
- name: build lapack-netlib tests
|
||||
working-directory: ./lapack-netlib/TESTING
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
|
||||
CC='${triple}-gcc' \
|
||||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
|
||||
RANLIB='ccache ${triple}-ranlib' \
|
||||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \
|
||||
LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \
|
||||
LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \
|
||||
LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \
|
||||
|
||||
- name: OpenBLAS tests
|
||||
shell: bash
|
||||
run: |
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
export QEMU_CPU=${{ matrix.qemu_cpu }}
|
||||
rm -rf ./test_out
|
||||
mkdir -p ./test_out
|
||||
run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \
|
||||
echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \
|
||||
if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \
|
||||
else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \
|
||||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \
|
||||
}
|
||||
run_test test cblat1 &
|
||||
run_test test cblat2 cblat2.dat &
|
||||
run_test test cblat3 cblat3.dat &
|
||||
run_test test dblat1 &
|
||||
run_test test dblat2 dblat2.dat &
|
||||
run_test test dblat3 dblat3.dat &
|
||||
run_test test sblat1 &
|
||||
run_test test sblat2 sblat2.dat &
|
||||
run_test test sblat3 sblat3.dat &
|
||||
run_test test zblat1 &
|
||||
run_test test zblat2 zblat2.dat &
|
||||
run_test test zblat3 zblat3.dat &
|
||||
run_test ctest xccblat1 &
|
||||
run_test ctest xccblat2 cin2 &
|
||||
run_test ctest xccblat3 cin3 &
|
||||
run_test ctest xdcblat1 &
|
||||
run_test ctest xdcblat2 din2 &
|
||||
run_test ctest xdcblat3 din3 &
|
||||
run_test ctest xscblat1 &
|
||||
run_test ctest xscblat2 sin2 &
|
||||
run_test ctest xscblat3 sin3 &
|
||||
run_test ctest xzcblat1 &
|
||||
run_test ctest xzcblat2 zin2 &
|
||||
run_test ctest xzcblat3 zin3 &
|
||||
wait
|
||||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
|
||||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
|
||||
|
||||
- name: netlib tests
|
||||
shell: bash
|
||||
run: |
|
||||
: # these take a very long time
|
||||
echo "Skipping netlib tests in CI"
|
||||
exit 0
|
||||
: # comment out exit above to enable the tests
|
||||
: # probably we want to identify a subset to run in CI
|
||||
export PATH="/opt/riscv/bin:$PATH"
|
||||
export QEMU_CPU=${{ matrix.qemu_cpu }}
|
||||
rm -rf ./test_out
|
||||
mkdir -p ./test_out
|
||||
run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \
|
||||
echo "$4" >> $OUTPUT; \
|
||||
echo "$CMD" >> $OUTPUT; \
|
||||
qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \
|
||||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \
|
||||
if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \
|
||||
if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \
|
||||
}
|
||||
run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" &
|
||||
run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" &
|
||||
run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" &
|
||||
run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" &
|
||||
run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" &
|
||||
run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" &
|
||||
run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" &
|
||||
run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" &
|
||||
run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" &
|
||||
run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" &
|
||||
run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" &
|
||||
run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" &
|
||||
run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" &
|
||||
run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" &
|
||||
run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" &
|
||||
run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" &
|
||||
run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" &
|
||||
run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" &
|
||||
run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" &
|
||||
run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" &
|
||||
run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" &
|
||||
run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" &
|
||||
run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" &
|
||||
run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
|
||||
run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" &
|
||||
run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" &
|
||||
run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" &
|
||||
run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" &
|
||||
run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" &
|
||||
run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" &
|
||||
run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" &
|
||||
run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" &
|
||||
run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" &
|
||||
run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" &
|
||||
run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
|
||||
run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
|
||||
run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" &
|
||||
run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" &
|
||||
run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" &
|
||||
run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" &
|
||||
run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" &
|
||||
run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
|
||||
run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" &
|
||||
run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" &
|
||||
run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
|
||||
run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" &
|
||||
run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" &
|
||||
run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" &
|
||||
run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" &
|
||||
run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" &
|
||||
run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" &
|
||||
run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" &
|
||||
wait
|
||||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
|
||||
python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary
|
||||
TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)"
|
||||
NUMERICAL_ERRORS=-1
|
||||
OTHER_ERRORS=-1
|
||||
. <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary
|
||||
if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi
|
||||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi
|
|
@ -14,7 +14,6 @@ lapack-3.4.2
|
|||
lapack-3.4.2.tgz
|
||||
lapack-netlib/make.inc
|
||||
lapack-netlib/lapacke/include/lapacke_mangling.h
|
||||
lapack-netlib/SRC/la_constants.mod
|
||||
lapack-netlib/TESTING/testing_results.txt
|
||||
lapack-netlib/INSTALL/test*
|
||||
lapack-netlib/TESTING/xeigtstc
|
||||
|
@ -47,66 +46,46 @@ config_last.h
|
|||
getarch
|
||||
getarch_2nd
|
||||
utest/openblas_utest
|
||||
utest/openblas_utest_ext
|
||||
ctest/xccblat1
|
||||
ctest/xccblat2
|
||||
ctest/xccblat3
|
||||
ctest/xccblat3_3m
|
||||
ctest/xdcblat1
|
||||
ctest/xdcblat2
|
||||
ctest/xdcblat3
|
||||
ctest/xdcblat3_3m
|
||||
ctest/xscblat1
|
||||
ctest/xscblat2
|
||||
ctest/xscblat3
|
||||
ctest/xscblat3_3m
|
||||
ctest/xzcblat1
|
||||
ctest/xzcblat2
|
||||
ctest/xzcblat3
|
||||
ctest/xzcblat3_3m
|
||||
exports/linktest.c
|
||||
exports/linux.def
|
||||
kernel/setparam_*.c
|
||||
kernel/kernel_*.h
|
||||
test/CBLAT2.SUMM
|
||||
test/CBLAT3.SUMM
|
||||
test/CBLAT3_3M.SUMM
|
||||
test/DBLAT2.SUMM
|
||||
test/DBLAT3.SUMM
|
||||
test/DBLAT3_3M.SUMM
|
||||
test/SBLAT2.SUMM
|
||||
test/SBLAT3.SUMM
|
||||
test/SBLAT3_3M.SUMM
|
||||
test/ZBLAT2.SUMM
|
||||
test/ZBLAT3.SUMM
|
||||
test/ZBLAT3_3M.SUMM
|
||||
test/SHBLAT3.SUMM
|
||||
test/SBBLAT3.SUMM
|
||||
test/cblat1
|
||||
test/cblat2
|
||||
test/cblat3
|
||||
test/cblat3_3m
|
||||
test/dblat1
|
||||
test/dblat2
|
||||
test/dblat3
|
||||
test/dblat3_3m
|
||||
test/sblat1
|
||||
test/sblat2
|
||||
test/sblat3
|
||||
test/sblat3_3m
|
||||
test/test_shgemm
|
||||
test/test_sbgemm
|
||||
test/zblat1
|
||||
test/zblat2
|
||||
test/zblat3
|
||||
test/zblat3_3m
|
||||
build
|
||||
build.*
|
||||
*.swp
|
||||
benchmark/*.goto
|
||||
benchmark/smallscaling
|
||||
.vscode
|
||||
CMakeCache.txt
|
||||
CMakeFiles/*
|
||||
.vscode
|
||||
**/__pycache__
|
||||
|
|
367
.travis.yml
367
.travis.yml
|
@ -1,39 +1,34 @@
|
|||
# XXX: Precise is already deprecated, new default is Trusty.
|
||||
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
|
||||
dist: focal
|
||||
dist: precise
|
||||
sudo: true
|
||||
language: c
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- &test-ubuntu
|
||||
# os: linux
|
||||
os: linux
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
# before_script: &common-before
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
# script:
|
||||
# - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# - make -C test $COMMON_FLAGS $BTYPE
|
||||
# - make -C ctest $COMMON_FLAGS $BTYPE
|
||||
# - make -C utest $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
os: linux
|
||||
arch: ppc64le
|
||||
before_script: &common-before
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
|
||||
script:
|
||||
- travis_wait 50 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- set -e
|
||||
- make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux-ppc64le
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX
|
||||
|
@ -44,132 +39,85 @@ matrix:
|
|||
arch: s390x
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
|
||||
- sudo apt-get install --only-upgrade binutils
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=IBMZ_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
os: linux
|
||||
dist: focal
|
||||
arch: s390x
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
env:
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
compiler: clang
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
|
||||
- sudo apt-get install --only-upgrade binutils
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=IBMZ_LINUX
|
||||
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 CC=clang"
|
||||
|
||||
# - <<: *test-ubuntu
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 INTERFACE64=1"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# compiler: clang
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 CC=clang"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# compiler: clang
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64
|
||||
# - BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
|
||||
#
|
||||
# - <<: *test-ubuntu
|
||||
# addons:
|
||||
# apt:
|
||||
# packages:
|
||||
# - gcc-multilib
|
||||
# - gfortran-multilib
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX32
|
||||
# - BTYPE="BINARY=32"
|
||||
#
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
dist: bionic
|
||||
compiler: gcc
|
||||
before_script:
|
||||
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
|
||||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
script:
|
||||
- travis_wait 50 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
- <<: *test-ubuntu
|
||||
compiler: clang
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX_P9
|
||||
- TARGET_BOX=LINUX64
|
||||
- BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
|
||||
|
||||
- <<: *test-ubuntu
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-multilib
|
||||
- gfortran-multilib
|
||||
env:
|
||||
- TARGET_BOX=LINUX32
|
||||
- BTYPE="BINARY=32"
|
||||
|
||||
- os: linux
|
||||
arch: ppc64le
|
||||
dist: bionic
|
||||
compiler: gcc
|
||||
before_script:
|
||||
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
|
||||
- sudo apt-get update
|
||||
- sudo apt-get install gcc-9 gfortran-9 -y
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- binutils-mingw-w64-x86-64
|
||||
- gcc-mingw-w64-x86-64
|
||||
- gfortran-mingw-w64-x86-64
|
||||
before_script: *common-before
|
||||
script:
|
||||
- travis_wait 50 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
|
||||
- make -C test $COMMON_FLAGS $BTYPE
|
||||
- make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- make -C utest $COMMON_FLAGS $BTYPE
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
# for matrix annotation only
|
||||
- TARGET_BOX=PPC64LE_LINUX_P9
|
||||
- TARGET_BOX=WIN64
|
||||
- BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
|
||||
# - os: linux
|
||||
# compiler: gcc
|
||||
# addons:
|
||||
# apt:
|
||||
# packages:
|
||||
# - binutils-mingw-w64-x86-64
|
||||
# - gcc-mingw-w64-x86-64
|
||||
# - gfortran-mingw-w64-x86-64
|
||||
# before_script: *common-before
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - TARGET_BOX=WIN64
|
||||
# - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
|
||||
#
|
||||
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
|
||||
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
|
||||
# which is slower than container-based infrastructure used for jobs
|
||||
# that don't require sudo.
|
||||
# - &test-alpine
|
||||
# os: linux
|
||||
# dist: trusty
|
||||
# sudo: true
|
||||
# language: minimal
|
||||
# before_install:
|
||||
# - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
|
||||
# && echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
|
||||
# - alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
|
||||
# install:
|
||||
# - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
|
||||
# before_script: *common-before
|
||||
# script:
|
||||
# # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
|
||||
# - alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
|
||||
# - alpine make -C test $COMMON_FLAGS $BTYPE
|
||||
# - alpine make -C ctest $COMMON_FLAGS $BTYPE
|
||||
# - alpine make -C utest $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64"
|
||||
- &test-alpine
|
||||
os: linux
|
||||
dist: trusty
|
||||
sudo: true
|
||||
language: minimal
|
||||
before_install:
|
||||
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
|
||||
&& echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
|
||||
- alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
|
||||
install:
|
||||
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
|
||||
before_script: *common-before
|
||||
script:
|
||||
- set -e
|
||||
# XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
|
||||
- alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
|
||||
- alpine make -C test $COMMON_FLAGS $BTYPE
|
||||
- alpine make -C ctest $COMMON_FLAGS $BTYPE
|
||||
- alpine make -C utest $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64"
|
||||
|
||||
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
|
||||
# but only on Travis CI, cannot reproduce it elsewhere.
|
||||
|
@ -179,132 +127,75 @@ matrix:
|
|||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 USE_OPENMP=1"
|
||||
|
||||
# - <<: *test-alpine
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 INTERFACE64=1"
|
||||
#
|
||||
# # Build with the same flags as Alpine do in OpenBLAS package.
|
||||
# - <<: *test-alpine
|
||||
# env:
|
||||
# - TARGET_BOX=LINUX64_MUSL
|
||||
# - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
|
||||
- <<: *test-alpine
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64 INTERFACE64=1"
|
||||
|
||||
# - &test-cmake
|
||||
# os: linux
|
||||
# compiler: clang
|
||||
# addons:
|
||||
# apt:
|
||||
# packages:
|
||||
# - gfortran
|
||||
# - cmake
|
||||
# dist: trusty
|
||||
# sudo: true
|
||||
# before_script:
|
||||
# - COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
|
||||
# script:
|
||||
# - mkdir build
|
||||
# - CONFIG=Release
|
||||
# - cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
|
||||
# - cmake --build build --config $CONFIG -- -j2
|
||||
# env:
|
||||
# - CMAKE=1
|
||||
# - <<: *test-cmake
|
||||
# env:
|
||||
# - CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
|
||||
# - <<: *test-cmake
|
||||
# compiler: gcc
|
||||
# env:
|
||||
# - CMAKE=1
|
||||
# Build with the same flags as Alpine do in OpenBLAS package.
|
||||
- <<: *test-alpine
|
||||
env:
|
||||
- TARGET_BOX=LINUX64_MUSL
|
||||
- BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"
|
||||
|
||||
# - &test-macos
|
||||
# os: osx
|
||||
# osx_image: xcode11.5
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9"
|
||||
#
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode12
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# - brew update
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
|
||||
#
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode12
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# - brew update
|
||||
# script:
|
||||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
# env:
|
||||
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
|
||||
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode10
|
||||
# env:
|
||||
# - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
|
||||
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode11.5
|
||||
# before_script:
|
||||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
# - brew update
|
||||
# env:
|
||||
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
# - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
|
||||
# - <<: *test-macos
|
||||
# osx_image: xcode11.5
|
||||
# env:
|
||||
## - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
## - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||
# - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
|
||||
|
||||
- &test-neoversen1
|
||||
- &test-cmake
|
||||
os: linux
|
||||
arch: arm64
|
||||
dist: focal
|
||||
group: edge
|
||||
virt: lxd
|
||||
compiler: gcc
|
||||
compiler: clang
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gfortran
|
||||
- cmake
|
||||
dist: trusty
|
||||
sudo: true
|
||||
before_script:
|
||||
- COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
|
||||
script:
|
||||
- travis_wait 45 make && make lapack-test
|
||||
- set -e
|
||||
- mkdir build
|
||||
- CONFIG=Release
|
||||
- cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
|
||||
- cmake --build build --config $CONFIG -- -j2
|
||||
env:
|
||||
- TARGET_BOX=NEOVERSE_N1
|
||||
|
||||
- &test-neon1-gcc8
|
||||
os: linux
|
||||
arch: arm64
|
||||
dist: focal
|
||||
group: edge
|
||||
virt: lxd
|
||||
- CMAKE=1
|
||||
- <<: *test-cmake
|
||||
env:
|
||||
- CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
|
||||
- <<: *test-cmake
|
||||
compiler: gcc
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- gcc-8
|
||||
- gfortran-8
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 CC=gcc-8 FC=gfortran-8 DYNAMIC_ARCH=1
|
||||
env:
|
||||
- TARGET_BOX=NEOVERSE_N1-GCC8
|
||||
- CMAKE=1
|
||||
|
||||
- &test-macos
|
||||
os: osx
|
||||
osx_image: xcode10.1
|
||||
before_script:
|
||||
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
|
||||
- brew update
|
||||
- brew install gcc@8 # for gfortran
|
||||
script:
|
||||
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
|
||||
env:
|
||||
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode10.0
|
||||
env:
|
||||
- BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode10.1
|
||||
env:
|
||||
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
|
||||
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
|
||||
|
||||
- <<: *test-macos
|
||||
osx_image: xcode10.1
|
||||
env:
|
||||
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
|
||||
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
|
||||
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"
|
||||
# whitelist
|
||||
branches:
|
||||
only:
|
||||
|
|
|
@ -1,9 +1,5 @@
|
|||
Thank you for the support.
|
||||
|
||||
### [2019.12/2021.9] [Chan-Zuckerberg Foundation EOSS Initiative](https://chanzuckerberg.com/eoss/)
|
||||
|
||||
Between December 2019 and September 2021, development and maintaining of OpenBLAS was funded in part by the Chan-Zuckerberg Foundation in the context of two grants awarded to the NumPy Foundation and managed by NumFocus (Cycles 1 and 3 of the Essential Open Source Software for Science (EOSS) Initiative of the Chan-Zuckerberg Foundation)
|
||||
|
||||
### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project)
|
||||
|
||||
https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges
|
||||
|
|
336
CMakeLists.txt
336
CMakeLists.txt
|
@ -2,14 +2,11 @@
|
|||
## Author: Hank Anderson <hank@statease.com>
|
||||
##
|
||||
|
||||
cmake_minimum_required(VERSION 3.16.0)
|
||||
|
||||
cmake_minimum_required(VERSION 2.8.5)
|
||||
project(OpenBLAS C ASM)
|
||||
|
||||
set(OpenBLAS_MAJOR_VERSION 0)
|
||||
set(OpenBLAS_MINOR_VERSION 3)
|
||||
set(OpenBLAS_PATCH_VERSION 28.dev)
|
||||
|
||||
set(OpenBLAS_PATCH_VERSION 9.dev)
|
||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")
|
||||
|
||||
# Adhere to GNU filesystem layout conventions
|
||||
|
@ -17,63 +14,27 @@ include(GNUInstallDirs)
|
|||
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
|
||||
#######
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)
|
||||
|
||||
option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON)
|
||||
|
||||
set(LAPACK_STRLEN "" CACHE STRING "When building LAPACK, use this type (e.g. \"int\") for character lengths (defaults to size_t)")
|
||||
|
||||
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)
|
||||
|
||||
option(BUILD_BENCHMARKS "Build the collection of BLAS/LAPACK benchmarks" OFF)
|
||||
|
||||
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)
|
||||
|
||||
if(MSVC)
|
||||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" ON)
|
||||
endif()
|
||||
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)
|
||||
|
||||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64, ppc or RISCV64-RVV1.0 only)" OFF)
|
||||
|
||||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64 or ppc only)" OFF)
|
||||
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)
|
||||
|
||||
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)
|
||||
|
||||
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)
|
||||
|
||||
option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF)
|
||||
|
||||
option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON)
|
||||
|
||||
option(FIXED_LIBNAME "Use a non-versioned name for the library and no symbolic linking to variant names" OFF)
|
||||
|
||||
set(LIBNAMEPREFIX "" CACHE STRING "Add a prefix to the openblas part of the library name" )
|
||||
set(LIBNAMESUFFIX "" CACHE STRING "Add a suffix after the openblas part of the library name" )
|
||||
|
||||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
|
||||
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
|
||||
else()
|
||||
set(NO_AFFINITY 1)
|
||||
endif()
|
||||
|
||||
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
|
||||
|
||||
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
|
||||
option(BUILD_STATIC_LIBS "Build static library" OFF)
|
||||
if(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS)
|
||||
set(BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE)
|
||||
endif()
|
||||
if((BUILD_STATIC_LIBS AND BUILD_SHARED_LIBS) AND MSVC)
|
||||
message(WARNING "Could not enable both BUILD_STATIC_LIBS and BUILD_SHARED_LIBS with MSVC, Disable BUILD_SHARED_LIBS")
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static library" FORCE)
|
||||
endif()
|
||||
|
||||
# Add a prefix or suffix to all exported symbol names in the shared library.
|
||||
# Avoids conflicts with other BLAS libraries, especially when using
|
||||
# 64 bit integer interfaces in OpenBLAS.
|
||||
|
||||
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" )
|
||||
|
||||
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )
|
||||
|
||||
#######
|
||||
if(BUILD_WITHOUT_LAPACK)
|
||||
set(NO_LAPACK 1)
|
||||
|
@ -102,14 +63,10 @@ endif()
|
|||
|
||||
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")
|
||||
|
||||
if (USE_OPENMP)
|
||||
find_package(OpenMP REQUIRED)
|
||||
endif ()
|
||||
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")
|
||||
|
||||
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE})
|
||||
set(OpenBLAS_LIBNAME openblas${SUFFIX64_UNDERSCORE})
|
||||
|
||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others)
|
||||
|
||||
|
@ -129,13 +86,9 @@ if (NOT NO_LAPACK)
|
|||
list(APPEND SUBDIRS lapack)
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED BUILD_BFLOAT16)
|
||||
set (BUILD_BFLOAT16 false)
|
||||
endif ()
|
||||
# set which float types we want to build for
|
||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
|
||||
# if none are defined, build for all
|
||||
# set(BUILD_BFLOAT16 true)
|
||||
set(BUILD_SINGLE true)
|
||||
set(BUILD_DOUBLE true)
|
||||
set(BUILD_COMPLEX true)
|
||||
|
@ -167,11 +120,6 @@ if (BUILD_COMPLEX16)
|
|||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
|
||||
endif ()
|
||||
|
||||
if (BUILD_BFLOAT16)
|
||||
message(STATUS "Building Half Precision")
|
||||
# list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
|
||||
endif ()
|
||||
|
||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
|
||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
|
||||
endif ()
|
||||
|
@ -183,7 +131,6 @@ if(MSVC)
|
|||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
|
||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
|
||||
endif ()
|
||||
|
||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
|
||||
set(TARGET_OBJS "")
|
||||
foreach (SUBDIR ${SUBDIRS})
|
||||
|
@ -196,7 +143,7 @@ endforeach ()
|
|||
|
||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
|
||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
|
||||
if (NOT NO_LAPACK)
|
||||
if (NOT NOFORTRAN AND NOT NO_LAPACK)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
|
||||
if (NOT NO_LAPACKE)
|
||||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
|
||||
|
@ -221,73 +168,12 @@ if (${DYNAMIC_ARCH})
|
|||
endif ()
|
||||
|
||||
# add objects to the openblas lib
|
||||
if(NOT NO_LAPACK)
|
||||
add_library(LAPACK_OVERRIDES OBJECT ${LA_SOURCES})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACK_OVERRIDES>")
|
||||
endif()
|
||||
if(NOT NO_LAPACKE)
|
||||
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES})
|
||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>")
|
||||
endif()
|
||||
#if(BUILD_RELAPACK)
|
||||
# add_library(RELAPACK OBJECT ${RELA_SOURCES})
|
||||
# list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>")
|
||||
#endif()
|
||||
set(OpenBLAS_LIBS "")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME}_static INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
|
||||
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_static)
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_library(${OpenBLAS_LIBNAME}_shared SHARED ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME}_shared INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
|
||||
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_shared)
|
||||
endif()
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_static)
|
||||
else()
|
||||
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_shared)
|
||||
endif()
|
||||
|
||||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})
|
||||
add_library(${OpenBLAS_LIBNAME} ${LA_SOURCES} ${LAPACKE_SOURCES} ${RELA_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
|
||||
target_include_directories(${OpenBLAS_LIBNAME} INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
|
||||
|
||||
# Android needs to explicitly link against libm
|
||||
if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static m)
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_shared m)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (USE_OPENMP)
|
||||
if(BUILD_STATIC_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static OpenMP::OpenMP_C)
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_shared OpenMP::OpenMP_C)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Seems that this hack doesn't required since macOS 11 Big Sur
|
||||
if (APPLE AND BUILD_SHARED_LIBS AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
|
||||
set (CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
|
||||
if (NOT NOFORTRAN)
|
||||
set (CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
|
||||
set (CMAKE_Fortran_CREATE_SHARED_LIBRARY
|
||||
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
|
||||
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
|
||||
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
|
||||
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load dummy.o -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'"
|
||||
"sh -c 'ls -l ${CMAKE_BINARY_DIR}/lib'")
|
||||
else ()
|
||||
set (CMAKE_C_CREATE_SHARED_LIBRARY
|
||||
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/openblas_shared.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru libopenblas.a && exit 0' "
|
||||
"sh -c '${CMAKE_AR} -rs libopenblas.a ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' "
|
||||
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,libopenblas.a -Wl,-noall_load -o ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib'")
|
||||
endif ()
|
||||
if(ANDROID)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} m)
|
||||
endif()
|
||||
|
||||
# Handle MSVC exports
|
||||
|
@ -296,21 +182,21 @@ if(MSVC AND BUILD_SHARED_LIBS)
|
|||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
|
||||
else()
|
||||
# Creates verbose .def file (51KB vs 18KB)
|
||||
set_target_properties(${OpenBLAS_LIBNAME}_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Set output for libopenblas
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES EXPORT_NAME "OpenBLAS")
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES EXPORT_NAME "OpenBLAS")
|
||||
|
||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
|
||||
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )
|
||||
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
|
||||
endforeach()
|
||||
|
||||
enable_testing()
|
||||
|
@ -319,60 +205,40 @@ if (USE_THREAD)
|
|||
# Add threading library to linker
|
||||
find_package(Threads)
|
||||
if (THREADS_HAVE_PTHREAD_ARG)
|
||||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
|
||||
COMPILE_OPTIONS "-pthread"
|
||||
INTERFACE_COMPILE_OPTIONS "-pthread"
|
||||
)
|
||||
endif()
|
||||
if(BUILD_STATIC_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_static ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_shared ${CMAKE_THREAD_LIBS_INIT})
|
||||
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY COMPILE_OPTIONS "-pthread")
|
||||
set_property(TARGET ${OpenBLAS_LIBNAME} PROPERTY INTERFACE_COMPILE_OPTIONS "-pthread")
|
||||
endif()
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} ${CMAKE_THREAD_LIBS_INIT})
|
||||
endif()
|
||||
|
||||
#if (MSVC OR NOT NOFORTRAN)
|
||||
if (NOT NO_CBLAS)
|
||||
if (NOT ONLY_CBLAS)
|
||||
# Broken without fortran on unix
|
||||
add_subdirectory(utest)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (NOT NOFORTRAN)
|
||||
if (NOT ONLY_CBLAS)
|
||||
if (NOT MSVC AND NOT NOFORTRAN)
|
||||
# Build test and ctest
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
if (BUILD_TESTING AND NOT BUILD_WITHOUT_LAPACK)
|
||||
add_subdirectory(lapack-netlib/TESTING)
|
||||
endif()
|
||||
endif()
|
||||
if(NOT NO_CBLAS)
|
||||
if (NOT ONLY_CBLAS)
|
||||
add_subdirectory(ctest)
|
||||
endif()
|
||||
endif()
|
||||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
|
||||
add_subdirectory(cpp_thread_test)
|
||||
endif()
|
||||
|
||||
if (NOT FIXED_LIBNAME)
|
||||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
|
||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES
|
||||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
|
||||
SOVERSION ${OpenBLAS_MAJOR_VERSION}
|
||||
)
|
||||
endif()
|
||||
|
||||
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
|
||||
if (NOT MSVC)
|
||||
target_link_libraries(${OpenBLAS_LIBNAME}_shared "-Wl,-allow-multiple-definition")
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} "-Wl,-allow-multiple-definition")
|
||||
else()
|
||||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
|
||||
target_link_libraries(${OpenBLAS_LIBNAME} "/FORCE:MULTIPLE")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
|
||||
if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFIX} STREQUAL "")
|
||||
if (NOT DEFINED ARCH)
|
||||
set(ARCH_IN "x86_64")
|
||||
else()
|
||||
|
@ -429,128 +295,22 @@ if (BUILD_SHARED_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
|
|||
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
|
||||
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
|
||||
endif()
|
||||
|
||||
if (${BUILD_LAPACK_DEPRECATED})
|
||||
set (BLD 1)
|
||||
else ()
|
||||
set (BLD 0)
|
||||
endif()
|
||||
if (${BUILD_BFLOAT16})
|
||||
set (BBF16 1)
|
||||
else ()
|
||||
set (BBF16 0)
|
||||
endif()
|
||||
if (${BUILD_SINGLE})
|
||||
set (BS 1)
|
||||
else ()
|
||||
set (BS 0)
|
||||
endif()
|
||||
if (${BUILD_DOUBLE})
|
||||
set (BD 1)
|
||||
else ()
|
||||
set (BD 0)
|
||||
endif()
|
||||
if (${BUILD_COMPLEX})
|
||||
set (BC 1)
|
||||
else ()
|
||||
set (BC 0)
|
||||
endif()
|
||||
if (${BUILD_COMPLEX16})
|
||||
set (BZ 1)
|
||||
else ()
|
||||
set (BZ 0)
|
||||
endif()
|
||||
if (NOT USE_PERL)
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
|
||||
COMMAND ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME} POST_BUILD
|
||||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BUILD_LAPACK_DEPRECATED}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
|
||||
COMMENT "renaming symbols"
|
||||
)
|
||||
else()
|
||||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
|
||||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
|
||||
COMMAND objcopy -v --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
|
||||
COMMENT "renaming symbols"
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_BENCHMARKS)
|
||||
#find_package(OpenMP REQUIRED)
|
||||
file(GLOB SOURCES "benchmark/*.c")
|
||||
if (NOT USE_OPENMP)
|
||||
file(GLOB REMFILE "benchmark/smallscaling.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
endif()
|
||||
if (BUILD_WITHOUT_LAPACK)
|
||||
file(GLOB REMFILE "benchmark/cholesky.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/geev.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/gesv.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/getri.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/potrf.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/spmv.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/symv.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
file(GLOB REMFILE "benchmark/linpack.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
endif()
|
||||
if (NOT USE_GEMM3M)
|
||||
file(GLOB REMFILE "benchmark/gemm3m.c")
|
||||
list(REMOVE_ITEM SOURCES ${REMFILE})
|
||||
endif()
|
||||
foreach(source ${SOURCES})
|
||||
get_filename_component(name ${source} NAME_WE)
|
||||
if ((NOT ${name} STREQUAL "zdot-intel") AND (NOT ${name} STREQUAL "cula_wrapper"))
|
||||
set(defines DEFAULT COMPLEX DOUBLE "COMPLEX\;DOUBLE")
|
||||
foreach(define ${defines})
|
||||
set(target_name "benchmark_${name}")
|
||||
if (NOT "${define}" STREQUAL "DEFAULT")
|
||||
string(JOIN "_" define_str ${define})
|
||||
set(target_name "${target_name}_${define_str}")
|
||||
endif()
|
||||
if ((NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX_DOUBLE") AND
|
||||
(NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX_DOUBLE") AND
|
||||
(NOT ${target_name} STREQUAL "benchmark_max_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_max_COMPLEX_DOUBLE") AND
|
||||
(NOT ${target_name} STREQUAL "benchmark_min_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_min_COMPLEX_DOUBLE"))
|
||||
add_executable(${target_name} ${source})
|
||||
target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||
target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} )
|
||||
# target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} OpenMP::OpenMP_C)
|
||||
if (NOT "${define}" STREQUAL "DEFAULT")
|
||||
target_compile_definitions(${target_name} PRIVATE ${define})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
|
||||
# Install project
|
||||
|
||||
# Install libraries
|
||||
if(BUILD_SHARED_LIBS AND BUILD_STATIC_LIBS)
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}_shared
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}
|
||||
EXPORT "OpenBLAS${SUFFIX64}Targets"
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
install(TARGETS ${OpenBLAS_LIBNAME}_static
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
else()
|
||||
install(TARGETS ${OpenBLAS_LIBS}
|
||||
EXPORT "OpenBLAS${SUFFIX64}Targets"
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
|
||||
endif()
|
||||
|
||||
# Install headers
|
||||
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
|
@ -587,49 +347,36 @@ endif()
|
|||
|
||||
if(NOT NO_CBLAS)
|
||||
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
|
||||
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
|
||||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
|
||||
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
if (NOT ${SYMBOLPREFIX} STREQUAL "")
|
||||
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
endif()
|
||||
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
|
||||
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
|
||||
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
|
||||
endif()
|
||||
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
|
||||
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
endif()
|
||||
|
||||
if(NOT NO_LAPACKE)
|
||||
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
|
||||
if(BUILD_STATIC_LIBS)
|
||||
add_dependencies( ${OpenBLAS_LIBNAME}_static genlapacke)
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
add_dependencies( ${OpenBLAS_LIBNAME}_shared genlapacke)
|
||||
endif()
|
||||
add_dependencies( ${OpenBLAS_LIBNAME} genlapacke)
|
||||
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
|
||||
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
ADD_CUSTOM_TARGET(genlapacke
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
|
||||
)
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
|
||||
endif()
|
||||
|
||||
# Install pkg-config files
|
||||
include(FindPkgConfig QUIET)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
|
||||
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
|
||||
endif()
|
||||
|
||||
|
||||
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".
|
||||
set(PN OpenBLAS)
|
||||
set(CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PN}${SUFFIX64}")
|
||||
set(CMAKECONFIG_INSTALL_DIR "share/cmake/${PN}${SUFFIX64}")
|
||||
configure_package_config_file(cmake/${PN}Config.cmake.in
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
|
||||
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
|
@ -644,3 +391,4 @@ install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
|
|||
install(EXPORT "${PN}${SUFFIX64}Targets"
|
||||
NAMESPACE "${PN}${SUFFIX64}::"
|
||||
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
|
||||
|
||||
|
|
|
@ -23,9 +23,6 @@
|
|||
* Optimization on AMD Piledriver
|
||||
* Optimization on Intel Haswell
|
||||
|
||||
* Chris Sidebottom <chris.sidebottom@arm.com>
|
||||
* Optimizations and other improvements targeting AArch64
|
||||
|
||||
## Previous Developers
|
||||
|
||||
* Zaheer Chothia <zaheer.chothia@gmail.com>
|
||||
|
@ -183,49 +180,3 @@ In chronological order:
|
|||
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
|
||||
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
|
||||
* [2020-01-07] optimize AVX2 SGEMM and STRMM
|
||||
|
||||
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
|
||||
* [2020-04-15] Half-precision GEMM for bfloat16
|
||||
|
||||
* Marius Hillenbrand <https://github.com/mhillenibm>
|
||||
* [2020-05-12] Revise dynamic architecture detection for IBM z
|
||||
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
|
||||
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support
|
||||
|
||||
* Danfeng Zhang <https://github.com/craft-zhang>
|
||||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
|
||||
|
||||
* PingTouGe Semiconductor Co., Ltd.
|
||||
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910
|
||||
|
||||
* Jake Arkinstall <https://github.com/jake-arkinstall>
|
||||
* [2021-02-10] Remove in-source configure_file to enable builds in read-only contexts (issue #3100, PR #3101)
|
||||
|
||||
* River Dillon <oss@outerpassage.net>
|
||||
* [2021-07-10] fix compilation with musl libc
|
||||
|
||||
* Bine Brank <https://github.com/binebrank>
|
||||
* [2021-10-27] Add vector-length-agnostic DGEMM kernels for Arm SVE
|
||||
* [2021-11-20] Vector-length-agnostic Arm SVE copy routines for DGEMM, DTRMM, DSYMM
|
||||
* [2021-11-12] SVE kernels for SGEMM, STRMM and corresponding SVE copy functions
|
||||
* [2022-01-06] SVE kernels for CGEMM, ZGEMM, CTRMM, ZTRMM and corresponding SVE copy functions
|
||||
* [2022-01-18] SVE kernels and copy functions for TRSM
|
||||
|
||||
* Ilya Kurdyukov <https://github.com/ilyakurdyukov>
|
||||
* [2021-02-21] Add basic support for the Elbrus E2000 architecture
|
||||
|
||||
* PLCT Lab, Institute of Software Chinese Academy of Sciences
|
||||
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
|
||||
|
||||
* Pablo Romero <https://github.com/pablorcum>
|
||||
* [2022-08] Fix building from sources for QNX
|
||||
|
||||
* Mark Seminatore <https://github.com/mseminatore>
|
||||
* [2023-11-09] Improve Windows threading performance scaling
|
||||
* [2024-02-09] Introduce MT_TRACE facility and improve code consistency
|
||||
|
||||
* Dirreke <https://github.com/mseminatore>
|
||||
* [2024-01-16] Add basic support for the CSKY architecture
|
||||
|
||||
* Christopher Daley <https://github.com/cdaley>
|
||||
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems
|
||||
|
|
1096
Changelog.txt
1096
Changelog.txt
File diff suppressed because it is too large
Load Diff
|
@ -80,7 +80,7 @@
|
|||
SUN
|
||||
Fujitsu
|
||||
|
||||
4. Supported precision
|
||||
4. Suported precision
|
||||
|
||||
Now x86/x86_64 version support 80bit FP precision in addition to
|
||||
normal double presicion and single precision. Currently only
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
operation is finished.
|
||||
|
||||
|
||||
2. Similar problem may happen under virtual machine. If supervisor
|
||||
2. Simlar problem may happen under virtual machine. If supervisor
|
||||
allocates different cores for each scheduling, BLAS performnace
|
||||
will be bad. This is because BLAS also utilizes all cache,
|
||||
unexpected re-schedule for different core may result of heavy
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
pipeline {
|
||||
agent {
|
||||
docker {
|
||||
image 'osuosl/ubuntu-s390x'
|
||||
}
|
||||
}
|
||||
stages {
|
||||
stage('Build') {
|
||||
steps {
|
||||
sh 'make clean && make'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,16 +0,0 @@
|
|||
pipeline {
|
||||
agent {
|
||||
docker {
|
||||
image 'osuosl/ubuntu-ppc64le:18.04'
|
||||
}
|
||||
}
|
||||
stages {
|
||||
stage('Build') {
|
||||
steps {
|
||||
sh 'sudo apt update'
|
||||
sh 'sudo apt install gfortran -y'
|
||||
sh 'make clean && make'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
135
Makefile
135
Makefile
|
@ -1,9 +1,5 @@
|
|||
TOPDIR = .
|
||||
include ./Makefile.system
|
||||
LNCMD = ln -fs
|
||||
ifeq ($(FIXED_LIBNAME), 1)
|
||||
LNCMD = true
|
||||
endif
|
||||
|
||||
BLASDIRS = interface driver/level2 driver/level3 driver/others
|
||||
|
||||
|
@ -29,32 +25,21 @@ ifeq ($(NO_FORTRAN), 1)
|
|||
define NOFORTRAN
|
||||
1
|
||||
endef
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
define C_LAPACK
|
||||
define NO_LAPACK
|
||||
1
|
||||
endef
|
||||
endif
|
||||
export NOFORTRAN
|
||||
export NO_LAPACK
|
||||
export C_LAPACK
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER),CRAY)
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS))
|
||||
else
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
|
||||
endif
|
||||
|
||||
ifdef LAPACK_STRLEN
|
||||
LAPACK_FFLAGS += -DLAPACK_STRLEN=$(LAPACK_STRLEN)
|
||||
endif
|
||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS))
|
||||
|
||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test
|
||||
|
||||
.PHONY : all libs netlib $(RELA) test ctest shared install
|
||||
.NOTPARALLEL : shared
|
||||
.NOTPARALLEL : all libs $(RELA) prof lapack-test install blas-test
|
||||
|
||||
all :: tests
|
||||
all :: libs netlib $(RELA) tests shared
|
||||
@echo
|
||||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
|
||||
@echo
|
||||
|
@ -74,9 +59,6 @@ endif
|
|||
@$(CC) --version > /dev/null 2>&1;\
|
||||
if [ $$? -eq 0 ]; then \
|
||||
cverinfo=`$(CC) --version | sed -n '1p'`; \
|
||||
if [ -z "$${cverinfo}" ]; then \
|
||||
cverinfo=`$(CC) --version | sed -n '2p'`; \
|
||||
fi; \
|
||||
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
|
||||
else \
|
||||
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
|
||||
|
@ -85,9 +67,6 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
|||
@$(FC) --version > /dev/null 2>&1;\
|
||||
if [ $$? -eq 0 ]; then \
|
||||
fverinfo=`$(FC) --version | sed -n '1p'`; \
|
||||
if [ -z "$${fverinfo}" ]; then \
|
||||
fverinfo=`$(FC) --version | sed -n '2p'`; \
|
||||
fi; \
|
||||
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
|
||||
else \
|
||||
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
|
||||
|
@ -122,10 +101,6 @@ ifeq ($(OSNAME), Darwin)
|
|||
@echo "\"make PREFIX=/your_installation_path/ install\"."
|
||||
@echo
|
||||
@echo "(or set PREFIX in Makefile.rule and run make install."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
@echo "If you want to move the .dylib to a new location later, make sure you change"
|
||||
@echo "the internal name of the dylib with:"
|
||||
@echo
|
||||
|
@ -134,25 +109,22 @@ endif
|
|||
@echo
|
||||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
|
||||
@echo
|
||||
@echo "Note that any flags passed to make during build should also be passed to make install"
|
||||
@echo "to circumvent any install errors."
|
||||
@echo
|
||||
|
||||
shared : libs netlib $(RELA)
|
||||
shared :
|
||||
ifneq ($(NO_SHARED), 1)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
|
||||
@$(MAKE) -C exports so
|
||||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
|
||||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
|
||||
@$(MAKE) -C exports so
|
||||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
|
||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@$(MAKE) -C exports dyn
|
||||
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib
|
||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@$(MAKE) -C exports dll
|
||||
|
@ -160,42 +132,36 @@ endif
|
|||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
@$(MAKE) -C exports dll
|
||||
endif
|
||||
ifeq ($(OSNAME), AIX)
|
||||
@$(MAKE) -C exports so
|
||||
endif
|
||||
endif
|
||||
|
||||
tests : shared
|
||||
tests :
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
touch $(LIBNAME)
|
||||
ifndef NO_FBLAS
|
||||
$(MAKE) -C test all
|
||||
endif
|
||||
endif
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
$(MAKE) -C utest all
|
||||
endif
|
||||
ifneq ($(NO_CBLAS), 1)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
ifndef NO_CBLAS
|
||||
$(MAKE) -C ctest all
|
||||
endif
|
||||
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
|
||||
$(MAKE) -C cpp_thread_test all
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
libs :
|
||||
ifeq ($(CORE), UNKNOWN)
|
||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), 1)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Can only compile BLAS and f2c-converted LAPACK.)
|
||||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.)
|
||||
endif
|
||||
ifeq ($(NO_STATIC), 1)
|
||||
ifeq ($(NO_SHARED), 1)
|
||||
$(error OpenBLAS: neither static nor shared are enabled.)
|
||||
endif
|
||||
endif
|
||||
@-ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
|
@ -221,32 +187,15 @@ ifeq ($(DYNAMIC_OLDER), 1)
|
|||
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
@echo TARGET=$(CORE) >> Makefile.conf_last
|
||||
ifdef USE_THREAD
|
||||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
|
||||
endif
|
||||
ifdef SMP
|
||||
ifdef NUM_THREADS
|
||||
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last
|
||||
else
|
||||
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last
|
||||
endif
|
||||
endif
|
||||
ifeq ($(USE_OPENMP),1)
|
||||
@echo USE_OPENMP=1 >> Makefile.conf_last
|
||||
endif
|
||||
ifeq ($(INTERFACE64),1)
|
||||
@echo INTERFACE64=1 >> Makefile.conf_last
|
||||
endif
|
||||
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last
|
||||
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last
|
||||
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
@touch lib.grd
|
||||
|
||||
prof : prof_blas prof_lapack
|
||||
|
||||
prof_blas :
|
||||
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d prof || exit 1 ; \
|
||||
|
@ -257,7 +206,7 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
|||
endif
|
||||
|
||||
blas :
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d libs || exit 1 ; \
|
||||
|
@ -265,7 +214,7 @@ blas :
|
|||
done
|
||||
|
||||
hpl :
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
for d in $(BLASDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
|
@ -279,21 +228,26 @@ ifeq ($(DYNAMIC_ARCH), 1)
|
|||
endif
|
||||
|
||||
hpl_p :
|
||||
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
|
||||
for d in $(SUBDIRS) ../laswp exports ; \
|
||||
do if test -d $$d; then \
|
||||
$(MAKE) -C $$d $(@F) || exit 1 ; \
|
||||
fi; \
|
||||
done
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
netlib :
|
||||
|
||||
else
|
||||
netlib : lapack_prebuild
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
|
||||
endif
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
ifndef NO_LAPACKE
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(NO_LAPACK), 1)
|
||||
re_lapack :
|
||||
|
@ -307,26 +261,13 @@ prof_lapack : lapack_prebuild
|
|||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof
|
||||
|
||||
lapack_prebuild :
|
||||
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK)))
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
-@echo "override FFLAGS = $(LAPACK_FFLAGS) -fno-tree-vectorize" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1)
|
||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGIBM1)
|
||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
else
|
||||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
endif
|
||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
|
@ -359,18 +300,6 @@ else
|
|||
endif
|
||||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
|
||||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE), 1)
|
||||
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_DOUBLE), 1)
|
||||
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_COMPLEX), 1)
|
||||
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
ifeq ($(BUILD_COMPLEX16), 1)
|
||||
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
endif
|
||||
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
|
||||
|
@ -408,15 +337,14 @@ ifneq ($(CROSS), 1)
|
|||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
|
||||
endif
|
||||
|
||||
lapack-runtest: lapack-test
|
||||
lapack-runtest:
|
||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
|
||||
./testsecond; ./testdsecnd; ./testieee; ./testversion )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING )
|
||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r )
|
||||
|
||||
|
||||
blas-test:
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)
|
||||
|
||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
|
||||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)
|
||||
|
||||
|
@ -436,12 +364,11 @@ clean ::
|
|||
@$(MAKE) -C kernel clean
|
||||
#endif
|
||||
@$(MAKE) -C reference clean
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0
|
||||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@rm -rf getarch.dSYM getarch_2nd.dSYM
|
||||
endif
|
||||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
|
||||
@rm -f cblas.tmp cblas.tmp2
|
||||
@touch $(NETLIB_LAPACK_DIR)/make.inc
|
||||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
|
||||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling.h
|
||||
|
|
|
@ -1,24 +1,42 @@
|
|||
CPP = $(CC) -E
|
||||
RANLIB = ranlib
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
LIBNAME = $(LIBPREFIX)_ev4.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev4_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
LIBNAME = $(LIBPREFIX)_ev5.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev5_p.a
|
||||
endif
|
||||
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
LIBNAME = $(LIBPREFIX)_ev6.a
|
||||
LIBNAME_P = $(LIBPREFIX)_ev6_p.a
|
||||
endif
|
||||
|
||||
ifneq ($(COMPILER), NATIVE)
|
||||
# GCC User
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -mcpu=ev4
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -mcpu=ev4
|
||||
endif
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -mcpu=ev5
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -mcpu=ev5
|
||||
endif
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -mcpu=ev6
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -mcpu=ev6
|
||||
endif
|
||||
else
|
||||
# Compaq Compiler User
|
||||
ifeq ($(CORE), EV4)
|
||||
CCOMMON_OPT += -tune ev4 -arch ev4
|
||||
ifeq ($(LIBSUBARCH), EV4)
|
||||
OPTION += -DEV4 -tune ev4 -arch ev4
|
||||
endif
|
||||
ifeq ($(CORE), EV5)
|
||||
CCOMMON_OPT += -tune ev5 -arch ev5
|
||||
ifeq ($(LIBSUBARCH), EV5)
|
||||
OPTION += -DEV5 -tune ev5 -arch ev5
|
||||
endif
|
||||
ifeq ($(CORE), EV6)
|
||||
CCOMMON_OPT += -tune ev6 -arch ev6
|
||||
ifeq ($(LIBSUBARCH), EV6)
|
||||
OPTION += -DEV6 -tune ev6 -arch ev6
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
|
@ -12,8 +12,3 @@ ifeq ($(CORE), ARMV6)
|
|||
CCOMMON_OPT += -mfpu=vfp
|
||||
FCOMMON_OPT += -mfpu=vfp
|
||||
endif
|
||||
|
||||
ifdef HAVE_NEON
|
||||
CCOMMON_OPT += -mfpu=neon
|
||||
FCOMMON_OPT += -mfpu=neon
|
||||
endif
|
||||
|
|
295
Makefile.arm64
295
Makefile.arm64
|
@ -1,354 +1,65 @@
|
|||
ifneq ($(C_COMPILER), PGI)
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
ISCLANG=1
|
||||
endif
|
||||
ifeq ($(C_COMPILER), FUJITSU)
|
||||
ISCLANG=1
|
||||
endif
|
||||
ifneq (1, $(filter 1,$(GCCVERSIONGT4) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
|
||||
|
||||
else
|
||||
|
||||
|
||||
ifeq ($(CORE), ARMV8)
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ARMV8SVE)
|
||||
CCOMMON_OPT += -march=armv8-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a+sve
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA53)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA57)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA72)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA73)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), CORTEXA76)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FT2000)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N1 is only available
|
||||
# in GCC>=9
|
||||
ifeq ($(CORE), NEOVERSEN1)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
||||
ifeq ($(GCCVERSIONGTEQ7), 1)
|
||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-V1 is only available
|
||||
# in GCC>=10.4
|
||||
ifeq ($(CORE), NEOVERSEV1)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifeq (1, $(ISCLANG))
|
||||
CCOMMON_OPT += -mtune=cortex-x1
|
||||
else
|
||||
CCOMMON_OPT += -mtune=neoverse-v1
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(CROSS), 1)
|
||||
CCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a
|
||||
ifneq ($(CROSS), 1)
|
||||
FCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a72 tunings because Neoverse-N2 is only available
|
||||
# in GCC>=10.4
|
||||
ifeq ($(CORE), NEOVERSEN2)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifneq ($(OSNAME), Darwin)
|
||||
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.5-a+sve+bf16
|
||||
ifneq ($(CROSS), 1)
|
||||
CCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.5-a
|
||||
ifneq ($(CROSS), 1)
|
||||
FCOMMON_OPT += -mtune=native
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a+sve+bf16 -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Detect ARM Neoverse V2.
|
||||
ifeq ($(CORE), NEOVERSEV2)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv9-a -mtune=neoverse-v2
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv9-a -mtune=neoverse-v2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Use a53 tunings because a55 is only available in GCC>=8.1
|
||||
ifeq ($(CORE), CORTEXA55)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
|
||||
endif
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), FALKOR)
|
||||
CCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=falkor
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX2T99)
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), THUNDERX3T110)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.3-a
|
||||
ifeq (0, $(ISCLANG))
|
||||
CCOMMON_OPT += -mtune=thunderx3t110
|
||||
else
|
||||
CCOMMON_OPT += -mtune=thunderx2t99
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), VORTEX)
|
||||
CCOMMON_OPT += -march=armv8.3-a
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.3-a
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
||||
ifeq ($(GCCVERSIONGTEQ9), 1)
|
||||
ifeq ($(CORE), TSV110)
|
||||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
||||
ifeq ($(CORE), EMAG8180)
|
||||
CCOMMON_OPT += -march=armv8-a
|
||||
ifeq ($(ISCLANG), 0)
|
||||
CCOMMON_OPT += -mtune=emag
|
||||
endif
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8-a -mtune=emag
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), A64FX)
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ3) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-n1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-n1
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX1)
|
||||
CCOMMON_OPT += -march=armv8.2-a
|
||||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ12) $(ISCLANG)))
|
||||
CCOMMON_OPT += -mtune=cortex-x1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-x1
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -mtune=cortex-a72
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXX2)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
||||
CCOMMON_OPT += -mtune=cortex-x2
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mtune=cortex-x2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
#ifeq (1, $(filter 1,$(ISCLANG)))
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA510)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
||||
ifeq ($(CORE), CORTEXA710)
|
||||
CCOMMON_OPT += -march=armv8.4-a+sve
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=armv8.4-a+sve
|
||||
endif
|
||||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
||||
CCOMMON_OPT += -mtune=cortex-a710
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mtune=cortex-a710
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
endif
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
ifeq ($(CORE), CK860FV)
|
||||
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
|
||||
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static
|
||||
endif
|
|
@ -1 +0,0 @@
|
|||
COPT = -Wall -O2 # -DGEMMTEST
|
109
Makefile.install
109
Makefile.install
|
@ -2,21 +2,6 @@ TOPDIR = .
|
|||
export GOTOBLAS_MAKEFILE = 1
|
||||
-include $(TOPDIR)/Makefile.conf_last
|
||||
include ./Makefile.system
|
||||
LNCMD = ln -fs
|
||||
|
||||
ifdef THELIBNAME
|
||||
LIBNAME=$(THELIBNAME)
|
||||
LIBSONAME=$(THELIBSONAME)
|
||||
endif
|
||||
ifeq ($(FIXED_LIBNAME), 1)
|
||||
LNCMD = true
|
||||
endif
|
||||
ifeq ($(INTERFACE64),1)
|
||||
USE_64BITINT=1
|
||||
endif
|
||||
ifeq ($(USE_OPENMP),1)
|
||||
FOMP_OPT:= -fopenmp
|
||||
endif
|
||||
|
||||
PREFIX ?= /opt/OpenBLAS
|
||||
|
||||
|
@ -24,23 +9,10 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
|
|||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
|
||||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
|
||||
OPENBLAS_BUILD_DIR := $(CURDIR)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE)
|
||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
|
||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
|
||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
|
||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
|
||||
PKG_EXTRALIB := $(EXTRALIB)
|
||||
ifeq ($(INTERFACE64),1)
|
||||
SUFFIX64=64
|
||||
endif
|
||||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(C_COMPILER), PGI)
|
||||
PKG_EXTRALIB += -lomp
|
||||
else
|
||||
PKG_EXTRALIB += -lgomp
|
||||
endif
|
||||
endif
|
||||
|
||||
.PHONY : install
|
||||
.NOTPARALLEL : install
|
||||
|
@ -73,62 +45,47 @@ install : lib.grd
|
|||
|
||||
ifndef NO_CBLAS
|
||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@cp cblas.h cblas.tmp
|
||||
ifdef SYMBOLPREFIX
|
||||
@sed 's/cblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/openblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
|
||||
#change back any openblas_complex_float and double that got hit
|
||||
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/goto[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
|
||||
endif
|
||||
ifdef SYMBOLSUFFIX
|
||||
@sed 's/cblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/openblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
|
||||
#change back any openblas_complex_float and double that got hit
|
||||
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2
|
||||
@sed 's/goto[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
|
||||
endif
|
||||
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
|
||||
endif
|
||||
|
||||
ifneq ($(OSNAME), AIX)
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
|
||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
|
||||
endif
|
||||
|
||||
#for install static library
|
||||
ifneq ($(NO_STATIC),1)
|
||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@install -m644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifneq ($(NO_SHARED),1)
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
|
||||
@install -m755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
|
||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so
|
||||
endif
|
||||
ifeq ($(OSNAME), Darwin)
|
||||
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
|
||||
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
|
||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
|
||||
|
@ -142,7 +99,7 @@ endif
|
|||
|
||||
else
|
||||
#install on AIX has different options syntax
|
||||
ifneq ($(NO_LAPACKE), 1)
|
||||
ifndef NO_LAPACKE
|
||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
|
||||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
|
||||
|
@ -156,36 +113,27 @@ ifneq ($(NO_STATIC),1)
|
|||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
|
||||
endif
|
||||
#for install shared library
|
||||
ifneq ($(NO_SHARED),1)
|
||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
|
||||
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
|
||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \
|
||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
#Generating openblas.pc
|
||||
ifeq ($(INTERFACE64),1)
|
||||
SUFFIX64=64
|
||||
endif
|
||||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"
|
||||
|
||||
@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
|
||||
@echo 'libprefix='$(LIBNAMEPREFIX) >> "$(PKGFILE)"
|
||||
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
|
||||
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
|
||||
@echo 'omp_opt='$(FOMP_OPT) >> "$(PKGFILE)"
|
||||
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(TARGET) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
|
||||
@echo 'version='$(VERSION) >> "$(PKGFILE)"
|
||||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
|
||||
@cat openblas.pc.in >> "$(PKGFILE)"
|
||||
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
|
||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@echo 'extralib='$(EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
|
||||
|
||||
|
||||
#Generating OpenBLASConfig.cmake
|
||||
|
@ -196,7 +144,7 @@ endif
|
|||
ifneq ($(NO_SHARED),1)
|
||||
#ifeq logical or
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
endif
|
||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
|
||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
|
||||
|
@ -220,3 +168,4 @@ endif
|
|||
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
|
||||
@echo Install OK!
|
||||
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
ifdef BINARY64
|
||||
else
|
||||
endif
|
|
@ -1,4 +1,3 @@
|
|||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
|
||||
ifdef BINARY64
|
||||
else
|
||||
endif
|
||||
|
|
116
Makefile.power
116
Makefile.power
|
@ -9,110 +9,26 @@ else
|
|||
USE_OPENMP = 1
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER10)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifeq ($(GCCVERSIONGTEQ10), 1)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||
else ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
||||
else
|
||||
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
||||
endif
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER9)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mvsx -fno-fast-math
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
CCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
else
|
||||
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
else
|
||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
ifneq ($(GCCVERSIONGT4), 1)
|
||||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
||||
FCOMMON_OPT += -mcpu=power8 -mtune=power8
|
||||
else
|
||||
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), POWER8)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
||||
else
|
||||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
||||
endif
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
ifeq ($(OSNAME), AIX)
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
endif
|
||||
else
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
||||
else
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
else
|
||||
FCOMMON_OPT += -O2 -Mrecursive
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifneq ($(C_COMPILER), PGI)
|
||||
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
|
||||
else
|
||||
CCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
ifeq ($(F_COMPILER), IBM)
|
||||
FCOMMON_OPT += -DUSE_OPENMP
|
||||
else
|
||||
ifneq ($(F_COMPILER), PGI)
|
||||
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
||||
else
|
||||
FCOMMON_OPT += -DUSE_OPENMP -mp
|
||||
endif
|
||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
|
||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -fno-integrated-as
|
||||
endif
|
||||
# workaround for C->FORTRAN ABI violation in LAPACKE
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -fno-optimize-sibling-calls
|
||||
|
@ -147,25 +63,11 @@ endif
|
|||
ifdef BINARY64
|
||||
|
||||
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
|
||||
$(error Using GCC and XLF on AIX is not a supported combination.)
|
||||
endif
|
||||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
|
||||
$(error Using Clang and gFortran on AIX is not a supported combination.)
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), AIX)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mpowerpc64 -maix64
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mpowerpc64 -maix64
|
||||
endif
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
FCOMMON_OPT += -mpowerpc64 -maix64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), xlf)
|
||||
FCOMMON_OPT += -q64
|
||||
endif
|
||||
|
|
|
@ -3,10 +3,6 @@
|
|||
export BINARY
|
||||
export USE_OPENMP
|
||||
|
||||
ifdef DYNAMIC_ARCH
|
||||
override HOST_CFLAGS += -DDYNAMIC_ARCH
|
||||
endif
|
||||
|
||||
ifdef TARGET_CORE
|
||||
TARGET_MAKE = Makefile_kernel.conf
|
||||
TARGET_CONF = config_kernel.h
|
||||
|
@ -15,23 +11,13 @@ TARGET_MAKE = Makefile.conf
|
|||
TARGET_CONF = config.h
|
||||
endif
|
||||
|
||||
ifdef USE_PERL
|
||||
SCRIPTSUFFIX = .pl
|
||||
else
|
||||
SCRIPTSUFFIX =
|
||||
endif
|
||||
|
||||
# CPUIDEMU = ../../cpuid/table.o
|
||||
|
||||
ifdef CPUIDEMU
|
||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), MIPS24K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), MIPS1004K)
|
||||
ifeq ($(TARGET), 1004K)
|
||||
TARGET_FLAGS = -mips32r2
|
||||
endif
|
||||
|
||||
|
@ -51,38 +37,14 @@ ifeq ($(TARGET), I6500)
|
|||
TARGET_FLAGS = -mips64r6
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), C910V)
|
||||
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), CK860FV)
|
||||
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), x280)
|
||||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_ZVL256B)
|
||||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_ZVL128B)
|
||||
TARGET_FLAGS = -march=rv64imafdcv -mabi=lp64d
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET), RISCV64_GENERIC)
|
||||
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
|
||||
endif
|
||||
|
||||
all: getarch_2nd
|
||||
./getarch_2nd 0 >> $(TARGET_MAKE)
|
||||
./getarch_2nd 1 >> $(TARGET_CONF)
|
||||
|
||||
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
|
||||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" $(TARGET_FLAGS) $(CFLAGS)
|
||||
config.h : c_check f_check getarch
|
||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) $(CFLAGS)
|
||||
ifneq ($(ONLY_CBLAS), 1)
|
||||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" $(TARGET_FLAGS)
|
||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS)
|
||||
else
|
||||
#When we only build CBLAS, we set NOFORTRAN=2
|
||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
|
||||
|
@ -97,11 +59,9 @@ endif
|
|||
|
||||
|
||||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
|
||||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
|
||||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) -o $(@F) getarch.c cpuid.S $(CPUIDEMU)
|
||||
|
||||
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
|
||||
getarch_2nd : getarch_2nd.c config.h dummy
|
||||
ifndef TARGET_CORE
|
||||
$(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c
|
||||
else
|
||||
|
@ -109,5 +69,3 @@ else
|
|||
endif
|
||||
|
||||
dummy:
|
||||
|
||||
.PHONY: dummy
|
||||
|
|
|
@ -1,20 +0,0 @@
|
|||
ifeq ($(CORE), C910V)
|
||||
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
|
||||
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
|
||||
endif
|
||||
ifeq ($(CORE), x280)
|
||||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d -ffast-math
|
||||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_ZVL256B)
|
||||
CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_ZVL128B)
|
||||
CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d
|
||||
endif
|
||||
ifeq ($(CORE), RISCV64_GENERIC)
|
||||
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
||||
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
|
||||
endif
|
|
@ -3,12 +3,7 @@
|
|||
#
|
||||
|
||||
# This library's version
|
||||
VERSION = 0.3.28.dev
|
||||
|
||||
# If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a
|
||||
# and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library
|
||||
#
|
||||
# LIBNAMEPREFIX = scipy
|
||||
VERSION = 0.3.9.dev
|
||||
|
||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
|
||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
|
||||
|
@ -134,17 +129,8 @@ VERSION = 0.3.28.dev
|
|||
# Build LAPACK Deprecated functions since LAPACK 3.6.0
|
||||
BUILD_LAPACK_DEPRECATED = 1
|
||||
|
||||
# The variable type assumed for the length of character arguments when passing
|
||||
# data between Fortran LAPACK and C BLAS (defaults to "size_t", but older GCC
|
||||
# versions used "int"). Mismatches will not cause runtime failures but may result
|
||||
# in build warnings or errors when building with link-time optimization (LTO)
|
||||
# LAPACK_STRLEN=int
|
||||
|
||||
# Build RecursiveLAPACK on top of LAPACK
|
||||
# BUILD_RELAPACK = 1
|
||||
# Have RecursiveLAPACK actually replace standard LAPACK routines instead of
|
||||
# just adding its equivalents with a RELAPACK_ prefix
|
||||
# RELAPACK_REPLACE = 1
|
||||
|
||||
# If you want to use the legacy threaded Level 3 implementation.
|
||||
# USE_SIMPLE_THREADED_LEVEL3 = 1
|
||||
|
@ -179,10 +165,6 @@ NO_AFFINITY = 1
|
|||
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
|
||||
# BIGNUMA = 1
|
||||
|
||||
# If you are compiling for an embedded system ("bare metal") like Cortex M series
|
||||
# Note that you will have to provide implementations of malloc() and free() in this case
|
||||
# EMBEDDED = 1
|
||||
|
||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
|
||||
# and OS. However, the performance is low.
|
||||
# NO_AVX = 1
|
||||
|
@ -225,17 +207,7 @@ NO_AFFINITY = 1
|
|||
# to the user space. If bigphysarea is enabled, it will use it.
|
||||
# DEVICEDRIVER_ALLOCATION = 1
|
||||
|
||||
# Use large page allocation (called hugepage support in Linux context)
|
||||
# for the thread buffers (with access by shared memory operations)
|
||||
# HUGETLB_ALLOCATION = 1
|
||||
|
||||
# Use large page allocation called hugepages in Linux) based on mmap accessing
|
||||
# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system,
|
||||
# the example below has it mounted on /hugepages. OpenBLAS will create the backing
|
||||
# file as gotoblas.processid in that path)
|
||||
# HUGETLBFILE_ALLOCATION = /hugepages
|
||||
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
|
||||
# If you need to synchronize FP CSR between threads (for x86/x86_64 only).
|
||||
# CONSISTENT_FPCSR = 1
|
||||
|
||||
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
|
||||
|
@ -300,36 +272,7 @@ COMMON_PROF = -pg
|
|||
# work at all.
|
||||
#
|
||||
# CPP_THREAD_SAFETY_TEST = 1
|
||||
#
|
||||
# use this to run only the less memory-hungry GEMV test
|
||||
# CPP_THREAD_SAFETY_GEMV = 1
|
||||
|
||||
|
||||
# If you want to enable the experimental BFLOAT16 support
|
||||
# BUILD_BFLOAT16 = 1
|
||||
|
||||
|
||||
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
|
||||
# will be allocated on the heap rather than the stack. (This array alone requires
|
||||
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
|
||||
# counts, but obviously it is not the only item that ends up on the stack.
|
||||
# The default value of 32 ensures that the overall requirement is compatible
|
||||
# with the default 1MB stacksize imposed by having the Java VM loaded without use
|
||||
# of its -Xss parameter.
|
||||
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
|
||||
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
|
||||
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
|
||||
# BLAS3_MEM_ALLOC_THRESHOLD = 160
|
||||
|
||||
|
||||
|
||||
# By default the library contains BLAS functions (and LAPACK if selected) for all input types.
|
||||
# To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
|
||||
# the functions for complex numbers, uncomment the desired type(s) below
|
||||
# BUILD_SINGLE = 1
|
||||
# BUILD_DOUBLE = 1
|
||||
# BUILD_COMPLEX = 1
|
||||
# BUILD_COMPLEX16 = 1
|
||||
#
|
||||
# End of user configuration
|
||||
#
|
||||
|
|
|
@ -3,29 +3,21 @@ RANLIB = ranlib
|
|||
|
||||
ifdef BINARY64
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mcpu=v9 -m64
|
||||
else
|
||||
CCOMMON_OPT += -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mcpu=v9 -m64
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), f95)
|
||||
FCOMMON_OPT += -m64
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
else
|
||||
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -mcpu=v9
|
||||
else
|
||||
CCOMMON_OPT += -xarch=v9
|
||||
endif
|
||||
|
||||
ifeq ($(COMPILER_F77), g77)
|
||||
FCOMMON_OPT += -mcpu=v9
|
||||
endif
|
||||
ifeq ($(COMPILER_F77), f95)
|
||||
ifeq ($(COMPILER_F77), f90)
|
||||
FCOMMON_OPT += -xarch=v8plusb
|
||||
endif
|
||||
|
||||
|
|
621
Makefile.system
621
Makefile.system
File diff suppressed because it is too large
Load Diff
|
@ -1,18 +1,16 @@
|
|||
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
|
||||
|
||||
BLASOBJS = $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
|
||||
BLASOBJS_P = $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
|
||||
BLASOBJS = $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS)
|
||||
BLASOBJS_P = $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P)
|
||||
|
||||
ifdef EXPRECISION
|
||||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
||||
|
@ -24,23 +22,19 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
|
|||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
|
||||
endif
|
||||
|
||||
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
|
||||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
|
||||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
|
||||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
|
||||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
|
||||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
|
||||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
|
||||
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
|
||||
|
||||
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
$(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
|
||||
|
||||
libs :: $(BLASOBJS) $(COMMONOBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
|
33
Makefile.x86
33
Makefile.x86
|
@ -1,21 +1,5 @@
|
|||
# COMPILER_PREFIX = mingw32-
|
||||
|
||||
ifneq ($(DYNAMIC_ARCH),1)
|
||||
ADD_CPUFLAGS = 1
|
||||
else
|
||||
ifdef TARGET_CORE
|
||||
ADD_CPUFLAGS = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ADD_CPUFLAGS
|
||||
ifdef HAVE_SSE
|
||||
CCOMMON_OPT += -msse
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -msse
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
ARFLAGS = -m x86
|
||||
|
@ -70,20 +54,3 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
|
|||
else
|
||||
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
|
||||
endif
|
||||
ifdef HAVE_SSE2
|
||||
CCOMMON_OPT += -msse2
|
||||
FCOMMON_OPT += -msse2
|
||||
endif
|
||||
ifdef HAVE_SSE3
|
||||
CCOMMON_OPT += -msse3
|
||||
FCOMMON_OPT += -msse3
|
||||
ifdef HAVE_SSSE3
|
||||
CCOMMON_OPT += -mssse3
|
||||
FCOMMON_OPT += -mssse3
|
||||
endif
|
||||
ifdef HAVE_SSE4_1
|
||||
CCOMMON_OPT += -msse4.1
|
||||
FCOMMON_OPT += -msse4.1
|
||||
endif
|
||||
endif
|
||||
|
||||
|
|
182
Makefile.x86_64
182
Makefile.x86_64
|
@ -8,166 +8,11 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
ifeq ($(findstring icx,$(CC)),icx)
|
||||
CCOMMON_OPT += -fp-model=consistent
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(DYNAMIC_ARCH),1)
|
||||
ADD_CPUFLAGS = 1
|
||||
else
|
||||
ifdef TARGET_CORE
|
||||
ADD_CPUFLAGS = 1
|
||||
endif
|
||||
endif
|
||||
|
||||
ifdef ADD_CPUFLAGS
|
||||
ifdef HAVE_SSE3
|
||||
CCOMMON_OPT += -msse3
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -msse3
|
||||
endif
|
||||
endif
|
||||
ifdef HAVE_SSSE3
|
||||
CCOMMON_OPT += -mssse3
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mssse3
|
||||
endif
|
||||
endif
|
||||
ifdef HAVE_SSE4_1
|
||||
CCOMMON_OPT += -msse4.1
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -msse4.1
|
||||
endif
|
||||
endif
|
||||
ifndef OLDGCC
|
||||
ifdef HAVE_AVX
|
||||
CCOMMON_OPT += -mavx
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mavx
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifndef NO_AVX2
|
||||
ifdef HAVE_AVX2
|
||||
CCOMMON_OPT += -mavx2
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SKYLAKEX)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), COOPERLAKE)
|
||||
ifndef NO_AVX512
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# cooperlake support was added in 10.1
|
||||
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
|
||||
CCOMMON_OPT += -march=cooperlake
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=cooperlake
|
||||
endif
|
||||
else # gcc not support, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
else ifeq ($(C_COMPILER), CLANG)
|
||||
# cooperlake support was added in clang 9
|
||||
ifeq ($(CLANGVERSIONGTEQ9), 1)
|
||||
CCOMMON_OPT += -march=cooperlake
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=cooperlake
|
||||
endif
|
||||
else # not supported in clang, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), SAPPHIRERAPIDS)
|
||||
ifndef NO_AVX512
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# sapphire rapids support was added in 11
|
||||
ifeq ($(GCCVERSIONGTEQ11), 1)
|
||||
CCOMMON_OPT += -march=sapphirerapids
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=sapphirerapids
|
||||
endif
|
||||
else # gcc not support, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
else ifeq ($(C_COMPILER), CLANG)
|
||||
# sapphire rapids support was added in clang 12
|
||||
ifeq ($(CLANGVERSIONGTEQ12), 1)
|
||||
CCOMMON_OPT += -march=sapphirerapids
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=sapphirerapids
|
||||
endif
|
||||
else # not supported in clang, fallback to avx512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(CORE), ZEN)
|
||||
ifdef HAVE_AVX512VL
|
||||
ifndef NO_AVX512
|
||||
CCOMMON_OPT += -march=skylake-avx512
|
||||
ifneq ($(F_COMPILER), NAG)
|
||||
FCOMMON_OPT += -march=skylake-avx512
|
||||
endif
|
||||
ifeq ($(OSNAME), CYGWIN_NT)
|
||||
CCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
FCOMMON_OPT += -fno-asynchronous-unwind-tables
|
||||
|
@ -182,40 +27,19 @@ endif
|
|||
endif
|
||||
endif
|
||||
|
||||
|
||||
ifdef HAVE_AVX2
|
||||
ifeq ($(CORE), HASWELL)
|
||||
ifndef DYNAMIC_ARCH
|
||||
ifndef NO_AVX2
|
||||
ifeq ($(C_COMPILER), GCC)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
else
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
|
||||
ifeq ($(F_COMPILER), GFORTRAN)
|
||||
# AVX2 support was added in 4.7.0
|
||||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
|
||||
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
|
||||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
|
||||
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
|
||||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
else
|
||||
ifeq ($(F_COMPILER), FLANG)
|
||||
FCOMMON_OPT += -mavx2
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
endif
|
||||
|
||||
|
||||
ifeq ($(OSNAME), Interix)
|
||||
|
|
|
@ -5,12 +5,6 @@ FCOMMON_OPT += -march=z13 -mzvector
|
|||
endif
|
||||
|
||||
ifeq ($(CORE), Z14)
|
||||
CCOMMON_OPT += -march=z14 -mzvector -O3
|
||||
CCOMMON_OPT += -march=z14 -mzvector
|
||||
FCOMMON_OPT += -march=z14 -mzvector
|
||||
endif
|
||||
|
||||
# Enable floating-point expression contraction for clang, since it is the
|
||||
# default for gcc
|
||||
ifeq ($(C_COMPILER), CLANG)
|
||||
CCOMMON_OPT += -ffp-contract=on
|
||||
endif
|
||||
|
|
161
README.md
161
README.md
|
@ -2,24 +2,20 @@
|
|||
|
||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
||||
|
||||
Cirrus CI: [](https://cirrus-ci.com/github/xianyi/OpenBLAS)
|
||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS)
|
||||
|
||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop)
|
||||
|
||||
Drone CI: [](https://cloud.drone.io/xianyi/OpenBLAS/)
|
||||
|
||||
[](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)
|
||||
|
||||
OSUOSL POWERCI [](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/)
|
||||
|
||||
OSUOSL IBMZ-CI [](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/)
|
||||
## Introduction
|
||||
|
||||
OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.
|
||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version.
|
||||
|
||||
Please read the documentation in the OpenBLAS folder: <https://github.com/OpenMathLib/OpenBLAS/docs>.
|
||||
|
||||
For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib:
|
||||
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six
|
||||
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare <https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/> or Youtube <https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek> may be helpful.
|
||||
Please read the documentation on the OpenBLAS wiki pages: <https://github.com/xianyi/OpenBLAS/wiki>.
|
||||
|
||||
## Binary Packages
|
||||
|
||||
|
@ -27,13 +23,12 @@ We provide official binary packages for the following platform:
|
|||
|
||||
* Windows x86/x86_64
|
||||
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the Releases section of the github project page, [https://github.com/OpenMathLib/OpenBLAS/releases](https://github.com/OpenMathLib/OpenBLAS/releases).
|
||||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/).
|
||||
|
||||
## Installation from Source
|
||||
|
||||
Download from project homepage, https://github.com/OpenMathLib/OpenBLAS/, or check out the code
|
||||
using Git from https://github.com/OpenMathLib/OpenBLAS.git. (If you want the most up to date version, be
|
||||
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
|
||||
Download from project homepage, https://xianyi.github.com/OpenBLAS/, or check out the code
|
||||
using Git from https://github.com/xianyi/OpenBLAS.git.
|
||||
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
|
||||
Most can also be given directly on the make or cmake command line.
|
||||
|
||||
|
@ -41,45 +36,33 @@ Most can also be given directly on the make or cmake command line.
|
|||
|
||||
Building OpenBLAS requires the following to be installed:
|
||||
|
||||
* GNU Make or CMake
|
||||
* GNU Make
|
||||
* A C compiler, e.g. GCC or Clang
|
||||
* A Fortran compiler (optional, for LAPACK)
|
||||
|
||||
* IBM MASS (optional, see below)
|
||||
|
||||
### Normal compile
|
||||
|
||||
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
|
||||
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
|
||||
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
|
||||
can either be set there (typically by removing the comment character from the respective line), or used on the
|
||||
`make` command line.
|
||||
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
|
||||
in the build step, as some settings like the supported maximum number of threads are automatically derived from the
|
||||
build host by default, which might not be what you want.
|
||||
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
|
||||
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
|
||||
build options you plan to set.
|
||||
The full target list is in the file `TargetList.txt`.
|
||||
|
||||
### Cross compile
|
||||
|
||||
Set `CC` and `FC` to point to the cross toolchains, and if you use `make`, also set `HOSTCC` to your host C compiler.
|
||||
Set `CC` and `FC` to point to the cross toolchains, and set `HOSTCC` to your host C compiler.
|
||||
The target must be specified explicitly when cross compiling.
|
||||
|
||||
Examples:
|
||||
|
||||
* On a Linux system, cross-compiling to an older MIPS64 router board:
|
||||
* On an x86 box, compile this library for a loongson3a CPU:
|
||||
```sh
|
||||
make BINARY=64 CC=mipsisa64r6el-linux-gnuabi64-gcc FC=mipsisa64r6el-linux-gnuabi64-gfortran HOSTCC=gcc TARGET=P6600
|
||||
```
|
||||
* or to a Windows x64 host:
|
||||
```sh
|
||||
make CC="i686-w64-mingw32-gcc -Bstatic" FC="i686-w64-mingw32-gfortran -static-libgfortran" TARGET=HASWELL BINARY=32 CROSS=1 NUM_THREADS=20 CONSISTENT_FPCSR=1 HOSTCC=gcc
|
||||
make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-gfortran HOSTCC=gcc TARGET=LOONGSON3A
|
||||
```
|
||||
|
||||
You can find instructions for other cases both in the "Supported Systems" section below and in the docs folder. The .yml scripts included with the sources (which contain the
|
||||
build scripts for the "continuous integration" (CI) build tests automatically run on every proposed change to the sources) may also provide additional hints.
|
||||
|
||||
When compiling for a more modern CPU TARGET of the same architecture, e.g. TARGET=SKYLAKEX on a HASWELL host, option "CROSS=1" can be used to suppress the automatic invocation of the tests at the end of the build.
|
||||
* On an x86 box, compile this library for a loongson3a CPU with loongcc (based on Open64) compiler:
|
||||
```sh
|
||||
make CC=loongcc FC=loongf95 HOSTCC=gcc TARGET=LOONGSON3A CROSS=1 CROSS_SUFFIX=mips64el-st-linux-gnu- NO_LAPACKE=1 NO_SHARED=1 BINARY=32
|
||||
```
|
||||
|
||||
### Debug version
|
||||
|
||||
|
@ -118,7 +101,7 @@ Use `PREFIX=` when invoking `make`, for example
|
|||
```sh
|
||||
make install PREFIX=your_installation_directory
|
||||
```
|
||||
(along with all options you added on the `make` command line in the preceding build step)
|
||||
|
||||
The default installation directory is `/opt/OpenBLAS`.
|
||||
|
||||
## Supported CPUs and Operating Systems
|
||||
|
@ -133,17 +116,11 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
|
|||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
|
||||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
|
||||
- **Intel Skylake-X**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
|
||||
- **Intel Cooper Lake**: as Skylake-X with improved BFLOAT16 support.
|
||||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
|
||||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
|
||||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
|
||||
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)
|
||||
|
||||
#### MIPS32
|
||||
|
||||
- **MIPS 1004K**: uses P5600 codes
|
||||
- **MIPS 24K**: uses P5600 codes
|
||||
- **AMD ZEN**: Uses Haswell codes with some optimizations.
|
||||
|
||||
#### MIPS64
|
||||
|
||||
|
@ -159,105 +136,40 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
|
|||
|
||||
- **ARMv8**: Basic ARMV8 with small caches, optimized Level-3 and Level-2 BLAS
|
||||
- **Cortex-A53**: same as ARMV8 (different cpu specifications)
|
||||
- **Cortex-A55**: same as ARMV8 (different cpu specifications)
|
||||
- **Cortex A57**: Optimized Level-3 and Level-2 functions
|
||||
- **Cortex A72**: same as A57 ( different cpu specifications)
|
||||
- **Cortex A73**: same as A57 (different cpu specifications)
|
||||
- **Cortex A76**: same as A57 (different cpu specifications)
|
||||
- **Falkor**: same as A57 (different cpu specifications)
|
||||
- **ThunderX**: Optimized some Level-1 functions
|
||||
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2
|
||||
- **ThunderX3T110**
|
||||
- **TSV110**: Optimized some Level-3 helper functions
|
||||
- **EMAG 8180**: preliminary support based on A57
|
||||
- **Neoverse N1**: (AWS Graviton2) preliminary support
|
||||
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
|
||||
- **Apple Vortex**: preliminary support based on ThunderX2/3
|
||||
- **A64FX**: preliminary support, optimized Level-3 BLAS
|
||||
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions
|
||||
|
||||
#### PPC/PPC64
|
||||
|
||||
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
|
||||
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
|
||||
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.
|
||||
|
||||
- **AIX**: Dynamic architecture with OpenXL and OpenMP.
|
||||
```sh
|
||||
make CC=ibm-clang_r FC=xlf_r TARGET=POWER7 BINARY=64 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 USE_THREAD=1
|
||||
```
|
||||
|
||||
#### IBM zEnterprise System
|
||||
|
||||
- **Z13**: Optimized Level-3 BLAS and Level-1,2
|
||||
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2
|
||||
|
||||
#### RISC-V
|
||||
|
||||
- **C910V**: Optimized Level-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1.
|
||||
```sh
|
||||
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran
|
||||
```
|
||||
(also known to work on C906 as long as you use only single-precision functions - its instruction set support appears to be incomplete in double precision)
|
||||
|
||||
- **x280**: Level-3 BLAS and Level-1,2 are optimized by RISC-V Vector extension 1.0.
|
||||
```sh
|
||||
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
|
||||
```
|
||||
|
||||
- **ZVL???B**: Level-3 BLAS and Level-1,2 including vectorised kernels targeting generic RISCV cores with vector support with registers of at least the corresponding width; ZVL128B and ZVL256B are available.
|
||||
e.g.:
|
||||
```sh
|
||||
make TARGET=RISCV64_ZVL256B CFLAGS="-DTARGET=RISCV64_ZVL256B" \
|
||||
BINARY=64 ARCH=riscv64 CC='clang -target riscv64-unknown-linux-gnu' \
|
||||
AR=riscv64-unknown-linux-gnu-ar AS=riscv64-unknown-linux-gnu-gcc \
|
||||
LD=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran \
|
||||
HOSTCC=gcc HOSTFC=gfortran -j
|
||||
```
|
||||
|
||||
#### LOONGARCH64
|
||||
|
||||
- **LA64_GENERIC**: Optimized Level-3, Level-2 and Level-1 BLAS with scalar instruction
|
||||
```sh
|
||||
make HOSTCC=gcc TARGET=LA64_GENERIC CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
|
||||
```
|
||||
The old-style TARGET=LOONGSONGENERIC is still supported
|
||||
|
||||
- **LA264**: Optimized Level-3, Level-2 and Level-1 BLAS with LSX instruction
|
||||
```sh
|
||||
make HOSTCC=gcc TARGET=LA264 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
|
||||
```
|
||||
The old-style TARGET=LOONGSON2K1000 is still supported
|
||||
|
||||
- **LA464**: Optimized Level-3, Level-2 and Level-1 BLAS with LASX instruction
|
||||
```sh
|
||||
make HOSTCC=gcc TARGET=LA464 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
|
||||
```
|
||||
The old-style TARGET=LOONGSON3R5 is still supported
|
||||
|
||||
### Support for multiple targets in a single library
|
||||
|
||||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.
|
||||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying DYNAMIC_ARCH=1 in Makefile.rule, on the gmake command line or as -DDYNAMIC_ARCH=TRUE in cmake.
|
||||
|
||||
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX, Cooper Lake, Sapphire Rapids. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.
|
||||
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX. For cpu generations not included in this list, the corresponding older model is used. If you also specify DYNAMIC_OLDER=1, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option DYNAMIC_LIST that allows to specify an individual list of targets to include instead of the default.
|
||||
|
||||
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
|
||||
DYNAMIC_ARCH is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
|
||||
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.
|
||||
|
||||
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. If compiler support for SVE is available at build time, support for NeoverseN2, NeoverseV1 as well as generic ArmV8SVE targets is also enabled.
|
||||
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus.
|
||||
|
||||
For **POWER**, the list encompasses POWER6, POWER8 and POWER9. POWER10 is additionally available if a sufficiently recent compiler is used for the build.
|
||||
|
||||
on **ZARCH** it comprises Z13 and Z14 as well as generic zarch support.
|
||||
|
||||
On **riscv64**, DYNAMIC_ARCH enables support for riscv64_zvl128b and riscv64_zvl256b in addition to generic riscv64 support. A compiler that supports RVV 1.0 is required to build OpenBLAS for riscv64 when DYNAMIC_ARCH is enabled.
|
||||
|
||||
On **LoongArch64**, it comprises LA264 and LA464 as well as generic LoongArch64 support.
|
||||
|
||||
The `TARGET` option can - and usually **should** - be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the common code in the library, usually you will want to set this to the oldest model you expect to encounter.
|
||||
Failure to specify this may lead to advanced instructions being used by the compiler, just because the build host happens to support them. This is most likely to happen when aggressive optimization options are in effect, and the resulting library may then crash with an
|
||||
illegal instruction error on weaker hardware, before it even reaches the BLAS routines specifically included for that cpu.
|
||||
For **POWER**, the list encompasses POWER6, POWER8 and POWER9, on **ZARCH** it comprises Z13 and Z14.
|
||||
|
||||
The TARGET option can be used in conjunction with DYNAMIC_ARCH=1 to specify which cpu model should be assumed for all the
|
||||
common code in the library, usually you will want to set this to the oldest model you expect to encounter.
|
||||
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.
|
||||
|
||||
### Supported OS
|
||||
|
@ -270,10 +182,9 @@ Please note that it is not possible to combine support for different architectur
|
|||
- **NetBSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
|
||||
- **AIX**: Supported on PPC up to POWER10
|
||||
- **AIX**: Supported on PPC up to POWER8
|
||||
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **SunOS**: Supported by the community. We don't actively test the library on this OS.
|
||||
- **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>.
|
||||
- **SunOS**: Supported by the community. We don't actively test the library on this OS:
|
||||
|
||||
## Usage
|
||||
|
||||
|
@ -305,27 +216,25 @@ We provide the following functions to control the number of threads at runtime:
|
|||
void goto_set_num_threads(int num_threads);
|
||||
void openblas_set_num_threads(int num_threads);
|
||||
```
|
||||
Note that these are only used once at library initialization, and are not available for
|
||||
fine-tuning thread numbers in individual BLAS calls.
|
||||
|
||||
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.
|
||||
|
||||
## Reporting bugs
|
||||
|
||||
Please submit an issue in https://github.com/OpenMathLib/OpenBLAS/issues.
|
||||
Please submit an issue in https://github.com/xianyi/OpenBLAS/issues.
|
||||
|
||||
## Contact
|
||||
|
||||
+ Use github discussions: https://github.com/OpenMathLib/OpenBLAS/discussions
|
||||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
|
||||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev
|
||||
|
||||
## Change log
|
||||
|
||||
Please see Changelog.txt.
|
||||
Please see Changelog.txt to view the differences between OpenBLAS and GotoBLAS2 1.13 BSD version.
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
* Please read the [FAQ](https://github.com/OpenMathLib/OpenBLAS/docs/faq,md) in the docs folder first.
|
||||
* Please read the [FAQ](https://github.com/xianyi/OpenBLAS/wiki/Faq) first.
|
||||
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
|
||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
|
||||
Clang 3.0 will generate the wrong AVX binary code.
|
||||
|
@ -342,9 +251,9 @@ Please see Changelog.txt.
|
|||
|
||||
## Contributing
|
||||
|
||||
1. [Check for open issues](https://github.com/OpenMathLib/OpenBLAS/issues) or open a fresh issue
|
||||
1. [Check for open issues](https://github.com/xianyi/OpenBLAS/issues) or open a fresh issue
|
||||
to start a discussion around a feature idea or a bug.
|
||||
2. Fork the [OpenBLAS](https://github.com/OpenMathLib/OpenBLAS) repository to start making your changes.
|
||||
2. Fork the [OpenBLAS](https://github.com/xianyi/OpenBLAS) repository to start making your changes.
|
||||
3. Write a test which shows that the bug was fixed or that the feature works as expected.
|
||||
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.
|
||||
|
||||
|
|
20
SECURITY.md
20
SECURITY.md
|
@ -1,20 +0,0 @@
|
|||
# Security Policy
|
||||
|
||||
## Supported Versions
|
||||
|
||||
It is generally recommended to use the latest release as this project
|
||||
does not maintain multiple stable branches and providing packages e.g.
|
||||
for Linux distributions is outside our scope. In particular, versions
|
||||
before 0.3.18 can be assumed to carry the out-of-bounds-read error in
|
||||
the LAPACK ?LARRV family of functions that was the subject of
|
||||
CVE-2021-4048
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
If you suspect that you have found a vulnerability - a defect that could
|
||||
be abused to compromise the security of a user's code or systems - please
|
||||
do not use the normal github issue tracker (except perhaps to post a general
|
||||
warning if you deem that necessary). Instead, please contact the project
|
||||
maintainers through the email addresses given in their github user profiles.
|
||||
Defects found in the "lapack-netlib" subtree should ideally be reported to
|
||||
the maintainers of the reference implementation of LAPACK, lapack@icl.itk.edu
|
|
@ -22,8 +22,6 @@ SANDYBRIDGE
|
|||
HASWELL
|
||||
SKYLAKEX
|
||||
ATOM
|
||||
COOPERLAKE
|
||||
SAPPHIRERAPIDS
|
||||
|
||||
b)AMD CPU:
|
||||
ATHLON
|
||||
|
@ -51,7 +49,6 @@ POWER6
|
|||
POWER7
|
||||
POWER8
|
||||
POWER9
|
||||
POWER10
|
||||
PPCG4
|
||||
PPC970
|
||||
PPC970MP
|
||||
|
@ -61,11 +58,9 @@ CELL
|
|||
|
||||
3.MIPS CPU:
|
||||
P5600
|
||||
MIPS1004K
|
||||
MIPS24K
|
||||
1004K
|
||||
|
||||
4.MIPS64 CPU:
|
||||
MIPS64_GENERIC
|
||||
SICORTEX
|
||||
LOONGSON3A
|
||||
LOONGSON3B
|
||||
|
@ -93,59 +88,14 @@ CORTEXA53
|
|||
CORTEXA57
|
||||
CORTEXA72
|
||||
CORTEXA73
|
||||
CORTEXA76
|
||||
CORTEXA510
|
||||
CORTEXA710
|
||||
CORTEXX1
|
||||
CORTEXX2
|
||||
NEOVERSEN1
|
||||
NEOVERSEV1
|
||||
NEOVERSEN2
|
||||
CORTEXA55
|
||||
EMAG8180
|
||||
FALKOR
|
||||
THUNDERX
|
||||
THUNDERX2T99
|
||||
TSV110
|
||||
THUNDERX3T110
|
||||
VORTEX
|
||||
A64FX
|
||||
ARMV8SVE
|
||||
FT2000
|
||||
|
||||
9.System Z:
|
||||
ZARCH_GENERIC
|
||||
Z13
|
||||
Z14
|
||||
|
||||
10.RISC-V 64:
|
||||
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
|
||||
RISCV64_ZVL128B
|
||||
C910V
|
||||
x280
|
||||
RISCV64_ZVL256B
|
||||
|
||||
11.LOONGARCH64:
|
||||
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names,
|
||||
// and it is recommended to use the more standardized naming conventions
|
||||
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as
|
||||
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime,
|
||||
// and they will be internally relocated to LA64_GENERIC/LA264/LA464.
|
||||
LOONGSONGENERIC
|
||||
LOONGSON2K1000
|
||||
LOONGSON3R5
|
||||
LA64_GENERIC
|
||||
LA264
|
||||
LA464
|
||||
|
||||
12. Elbrus E2000:
|
||||
E2K
|
||||
|
||||
13. Alpha
|
||||
EV4
|
||||
EV5
|
||||
EV6
|
||||
|
||||
14.CSKY
|
||||
CSKY
|
||||
CK860FV
|
||||
|
|
36
appveyor.yml
36
appveyor.yml
|
@ -29,15 +29,15 @@ environment:
|
|||
global:
|
||||
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
|
||||
matrix:
|
||||
# - COMPILER: clang-cl
|
||||
# WITH_FORTRAN: ON
|
||||
# - COMPILER: clang-cl
|
||||
# DYNAMIC_ARCH: ON
|
||||
# WITH_FORTRAN: OFF
|
||||
# - COMPILER: cl
|
||||
# - COMPILER: MinGW64-gcc-7.2.0-mingw
|
||||
# DYNAMIC_ARCH: OFF
|
||||
# WITH_FORTRAN: ignore
|
||||
- COMPILER: clang-cl
|
||||
WITH_FORTRAN: yes
|
||||
- COMPILER: clang-cl
|
||||
DYNAMIC_ARCH: ON
|
||||
WITH_FORTRAN: no
|
||||
- COMPILER: cl
|
||||
- COMPILER: MinGW64-gcc-7.2.0-mingw
|
||||
DYNAMIC_ARCH: OFF
|
||||
WITH_FORTRAN: ignore
|
||||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||
COMPILER: MinGW-gcc-6.3.0-32
|
||||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
|
||||
|
@ -46,10 +46,13 @@ environment:
|
|||
|
||||
install:
|
||||
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
|
||||
- if [%COMPILER%]==[clang-cl] conda update --yes -n base conda
|
||||
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
|
||||
- if [%COMPILER%]==[clang-cl] conda config --set auto_update_conda false
|
||||
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1
|
||||
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake
|
||||
|
||||
- if [%WITH_FORTRAN%]==[no] conda install --yes --quiet ninja
|
||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet -c isuruf kitware-ninja
|
||||
- if [%WITH_FORTRAN%]==[yes] conda install --yes --quiet flang
|
||||
|
||||
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
|
||||
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
|
||||
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"
|
||||
|
@ -65,14 +68,15 @@ before_build:
|
|||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
|
||||
- if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DMSVC_STATIC_CRT=ON ..
|
||||
- if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
||||
- if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON ..
|
||||
- if [%WITH_FORTRAN%]==[no] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DMSVC_STATIC_CRT=ON ..
|
||||
- if [%WITH_FORTRAN%]==[yes] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
|
||||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..
|
||||
|
||||
build_script:
|
||||
- cmake --build .
|
||||
|
||||
test_script:
|
||||
- ctest -j2
|
||||
- echo Running Test
|
||||
- cd utest
|
||||
- openblas_utest
|
||||
|
||||
|
|
|
@ -4,14 +4,6 @@ trigger:
|
|||
branches:
|
||||
include:
|
||||
- develop
|
||||
resources:
|
||||
containers:
|
||||
- container: oneapi-hpckit
|
||||
image: intel/oneapi-hpckit:latest
|
||||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||
- container: oneapi-basekit
|
||||
image: intel/oneapi-basekit:latest
|
||||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
|
||||
|
||||
jobs:
|
||||
# manylinux1 is useful to test because the
|
||||
|
@ -19,7 +11,7 @@ jobs:
|
|||
# of gcc / glibc
|
||||
- job: manylinux1_gcc
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
vmImage: 'ubuntu-16.04'
|
||||
steps:
|
||||
- script: |
|
||||
echo "FROM quay.io/pypa/manylinux1_x86_64
|
||||
|
@ -35,7 +27,7 @@ jobs:
|
|||
displayName: Run manylinux1 docker build
|
||||
- job: Intel_SDE_skx
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
vmImage: 'ubuntu-16.04'
|
||||
steps:
|
||||
- script: |
|
||||
# at the time of writing the available Azure Ubuntu vm image
|
||||
|
@ -57,248 +49,3 @@ jobs:
|
|||
# we need a privileged docker run for sde process attachment
|
||||
docker run --privileged intel_sde
|
||||
displayName: 'Run AVX512 SkylakeX docker build / test'
|
||||
|
||||
- job: Windows_cl
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
workingDirectory: 'build' # Optional
|
||||
cmakeArgs: '-G "Visual Studio 17 2022" ..'
|
||||
- task: CMake@1
|
||||
inputs:
|
||||
cmakeArgs: '--build . --config Release'
|
||||
workingDirectory: 'build'
|
||||
- script: |
|
||||
cd build
|
||||
cd utest
|
||||
dir
|
||||
openblas_utest.exe
|
||||
|
||||
- job: Windows_mingw_gmake
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- script: |
|
||||
mingw32-make CC=gcc FC=gfortran DYNAMIC_ARCH=1 DYNAMIC_LIST="SANDYBRIDGE"
|
||||
|
||||
- job: Windows_clang_cmake
|
||||
pool:
|
||||
vmImage: 'windows-latest'
|
||||
steps:
|
||||
- script: |
|
||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||
set "LIB=C:\Miniconda\Library\lib;%LIB%"
|
||||
set "CPATH=C:\Miniconda\Library\include;%CPATH%
|
||||
conda config --add channels conda-forge --force
|
||||
conda config --set auto_update_conda false
|
||||
conda install --yes ninja
|
||||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DNOFORTRAN=1 -DMSVC_STATIC_CRT=ON ..
|
||||
cmake --build . --config Release
|
||||
ctest
|
||||
|
||||
- job: Windows_flang_clang
|
||||
pool:
|
||||
vmImage: 'windows-2022'
|
||||
steps:
|
||||
- script: |
|
||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||
set "LIB=C:\Miniconda\Library\lib;%LIB%"
|
||||
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
|
||||
conda config --add channels conda-forge --force
|
||||
conda config --set auto_update_conda false
|
||||
conda install --yes --quiet ninja flang
|
||||
mkdir build
|
||||
cd build
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
||||
cmake --build . --config Release
|
||||
ctest
|
||||
|
||||
- job: Windows_cl_flang
|
||||
pool:
|
||||
vmImage: 'windows-2022'
|
||||
steps:
|
||||
- script: |
|
||||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
|
||||
set "LIB=C:\Miniconda\Library\lib;%LIB%"
|
||||
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
|
||||
conda config --add channels conda-forge --force
|
||||
conda config --set auto_update_conda false
|
||||
conda install --yes --quiet ninja flang
|
||||
mkdir build
|
||||
cd build
|
||||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
||||
cmake -G "Ninja" -DCMAKE_C_COMPILER=cl -DCMAKE_Fortran_COMPILER=flang-new -DC_LAPACK=1 -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
|
||||
cmake --build . --config Release
|
||||
ctest
|
||||
ctest --rerun-failed --output-on-failure
|
||||
|
||||
|
||||
- job: OSX_OpenMP
|
||||
pool:
|
||||
vmImage: 'macOS-12'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13
|
||||
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13 PREFIX=../blasinst install
|
||||
ls -lR ../blasinst
|
||||
|
||||
- job: OSX_GCC_Nothreads
|
||||
pool:
|
||||
vmImage: 'macOS-12'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make USE_THREADS=0 CC=gcc-13 FC=gfortran-13
|
||||
|
||||
- job: OSX_GCC12
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
make CC=gcc-12 FC=gfortran-12
|
||||
|
||||
- job: OSX_OpenMP_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
make TARGET=CORE2 USE_OPENMP=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang NOFORTRAN=1
|
||||
|
||||
- job: OSX_OpenMP_Clang_cmake
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DTARGET=CORE2 -DUSE_OPENMP=1 -DINTERFACE64=1 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DNOFORTRAN=1 -DNO_AVX512=1 ..
|
||||
make
|
||||
ctest
|
||||
|
||||
- job: OSX_dynarch_cmake
|
||||
pool:
|
||||
vmImage: 'macOS-12'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
steps:
|
||||
- script: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=gcc-13 -DCMAKE_Fortran_COMPILER=gfortran-13 -DBUILD_SHARED_LIBS=ON ..
|
||||
cmake --build .
|
||||
ctest
|
||||
|
||||
- job: OSX_Ifort_Clang
|
||||
pool:
|
||||
vmImage: 'macOS-latest'
|
||||
variables:
|
||||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/edb4dc2f-266f-47f2-8d56-21bc7764e119/m_HPCKit_p_2023.2.0.49443.dmg
|
||||
LIBRARY_PATH: /usr/local/opt/llvm/lib
|
||||
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install llvm libomp
|
||||
sudo mkdir -p /opt/intel
|
||||
sudo chown $USER /opt/intel
|
||||
displayName: prepare for cache restore
|
||||
- task: Cache@2
|
||||
inputs:
|
||||
path: /opt/intel/oneapi
|
||||
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
|
||||
cacheHitVar: CACHE_RESTORED
|
||||
- script: |
|
||||
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
|
||||
hdiutil attach webimage.dmg
|
||||
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
|
||||
installer_exit_code=$?
|
||||
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
|
||||
exit $installer_exit_code
|
||||
displayName: install
|
||||
condition: ne(variables.CACHE_RESTORED, 'true')
|
||||
- script: |
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
make CC=/usr/local/opt/llvm/bin/clang FC=ifort
|
||||
|
||||
- job: OSX_NDK_ARMV7
|
||||
pool:
|
||||
vmImage: 'macOS-12'
|
||||
steps:
|
||||
- script: |
|
||||
brew update
|
||||
brew install --cask android-ndk
|
||||
export ANDROID_NDK_HOME=/usr/local/share/android-ndk
|
||||
make TARGET=ARMV7 ONLY_CBLAS=1 CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi21-clang AR=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/llvm-ar HOSTCC=gcc ARM_SOFTFP_ABI=1 -j4
|
||||
|
||||
- job: OSX_IOS_ARMV8
|
||||
pool:
|
||||
vmImage: 'macOS-12'
|
||||
variables:
|
||||
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch arm64 -miphoneos-version-min=10.0
|
||||
steps:
|
||||
- script: |
|
||||
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: OSX_IOS_ARMV7
|
||||
pool:
|
||||
vmImage: 'macOS-12'
|
||||
variables:
|
||||
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch armv7 -miphoneos-version-min=5.1
|
||||
steps:
|
||||
- script: |
|
||||
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: OSX_xbuild_DYNAMIC_ARM64
|
||||
pool:
|
||||
vmImage: 'macOS-12'
|
||||
variables:
|
||||
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
|
||||
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX13.1.sdk -arch arm64
|
||||
steps:
|
||||
- script: |
|
||||
ls /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
|
||||
/Applications/Xcode_12.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus
|
||||
/Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version
|
||||
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
|
||||
|
||||
- job: ALPINE_MUSL
|
||||
pool:
|
||||
vmImage: 'ubuntu-latest'
|
||||
steps:
|
||||
- script: |
|
||||
wget https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.14.0/alpine-chroot-install \
|
||||
&& echo 'ccbf65f85cdc351851f8ad025bb3e65bae4d5b06 alpine-chroot-install' | sha1sum -c \
|
||||
|| exit 1
|
||||
alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
|
||||
sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers sudo'
|
||||
alpine make DYNAMIC_ARCH=1 BINARY=64
|
||||
alpine make DYNAMIC_ARCH=1 BINARY=64 PREFIX=mytestdir install
|
||||
alpine ls -l mytestdir/include
|
||||
alpine echo "// tests that inclusion of openblas_config.h works with musl" >test_install.c
|
||||
alpine echo "#include <openblas_config.h>" >>test_install.c
|
||||
alpine echo "int main(){" >> test_install.c
|
||||
alpine echo "cpu_set_t* cpu_set = NULL;}" >>test_install.c
|
||||
alpine gcc -Imytestdir/include test_install.c -Lmytestdir/lib -lopenblas -lpthread -lgfortran -o test_install
|
||||
|
||||
|
|
|
@ -37,12 +37,6 @@ ESSL=/opt/ibm/lib
|
|||
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
|
||||
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a
|
||||
|
||||
# x280 temporary workaround for gfortran
|
||||
ifeq ($(TARGET), x280)
|
||||
CCOMMON_OPT:=$(filter-out -mllvm --riscv-v-vector-bits-min=512,$(CCOMMON_OPT))
|
||||
endif
|
||||
|
||||
|
||||
ifneq ($(NO_LAPACK), 1)
|
||||
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \
|
||||
|
@ -55,12 +49,6 @@ else
|
|||
GOTO_LAPACK_TARGETS=
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
GOTO_HALF_TARGETS=sbgemm.goto
|
||||
else
|
||||
GOTO_HALF_TARGETS=
|
||||
endif
|
||||
|
||||
ifeq ($(OSNAME), WINNT)
|
||||
|
||||
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
||||
|
@ -103,8 +91,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \
|
|||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \
|
||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \
|
||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto \
|
||||
somatcopy.goto domatcopy.goto comatcopy.goto zomatcopy.goto \
|
||||
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto $(GOTO_HALF_TARGETS)
|
||||
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto
|
||||
|
||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
||||
|
@ -277,8 +264,7 @@ goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \
|
|||
samin.goto damin.goto camin.goto zamin.goto \
|
||||
smin.goto dmin.goto \
|
||||
saxpby.goto daxpby.goto caxpby.goto zaxpby.goto \
|
||||
somatcopy.goto domatcopy.goto comatcopy.goto zomatcopy.goto \
|
||||
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) $(GOTO_HALF_TARGETS)
|
||||
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS)
|
||||
|
||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \
|
||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \
|
||||
|
@ -628,11 +614,6 @@ zcholesky.essl : zcholesky.$(SUFFIX)
|
|||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
##################################### Sgemm ####################################################
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
endif
|
||||
|
||||
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
|
@ -1833,7 +1814,7 @@ zsymv.veclib : zsymv.$(SUFFIX)
|
|||
|
||||
##################################### Sgeev ####################################################
|
||||
sgeev.goto : sgeev.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
sgeev.acml : sgeev.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -1849,7 +1830,7 @@ sgeev.veclib : sgeev.$(SUFFIX)
|
|||
|
||||
##################################### Dgeev ####################################################
|
||||
dgeev.goto : dgeev.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dgeev.acml : dgeev.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -1866,7 +1847,7 @@ dgeev.veclib : dgeev.$(SUFFIX)
|
|||
##################################### Cgeev ####################################################
|
||||
|
||||
cgeev.goto : cgeev.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
cgeev.acml : cgeev.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -1883,7 +1864,7 @@ cgeev.veclib : cgeev.$(SUFFIX)
|
|||
##################################### Zgeev ####################################################
|
||||
|
||||
zgeev.goto : zgeev.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
zgeev.acml : zgeev.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -1899,7 +1880,7 @@ zgeev.veclib : zgeev.$(SUFFIX)
|
|||
|
||||
##################################### Sgetri ####################################################
|
||||
sgetri.goto : sgetri.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
sgetri.acml : sgetri.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -1915,7 +1896,7 @@ sgetri.veclib : sgetri.$(SUFFIX)
|
|||
|
||||
##################################### Dgetri ####################################################
|
||||
dgetri.goto : dgetri.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
dgetri.acml : dgetri.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -1932,7 +1913,7 @@ dgetri.veclib : dgetri.$(SUFFIX)
|
|||
##################################### Cgetri ####################################################
|
||||
|
||||
cgetri.goto : cgetri.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
cgetri.acml : cgetri.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -1949,7 +1930,7 @@ cgetri.veclib : cgetri.$(SUFFIX)
|
|||
##################################### Zgetri ####################################################
|
||||
|
||||
zgetri.goto : zgetri.$(SUFFIX) ../$(LIBNAME)
|
||||
$(FC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
zgetri.acml : zgetri.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
@ -2908,29 +2889,6 @@ dznrm2.goto : dznrm2.$(SUFFIX) ../$(LIBNAME)
|
|||
dznrm2.atlas : dznrm2.$(SUFFIX)
|
||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB)
|
||||
|
||||
###################################################################################################
|
||||
|
||||
############################################ SOMATCOPY ############################################
|
||||
somatcopy.goto : somatcopy.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
###################################################################################################
|
||||
|
||||
############################################ DOMATCOPY ############################################
|
||||
domatcopy.goto : domatcopy.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
###################################################################################################
|
||||
|
||||
############################################ COMATCOPY ############################################
|
||||
comatcopy.goto : comatcopy.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
###################################################################################################
|
||||
|
||||
############################################ ZOMATCOPY ############################################
|
||||
zomatcopy.goto : zomatcopy.$(SUFFIX) ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
|
||||
|
||||
###################################################################################################
|
||||
|
||||
|
@ -2958,11 +2916,6 @@ ccholesky.$(SUFFIX) : cholesky.c
|
|||
zcholesky.$(SUFFIX) : cholesky.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
sbgemm.$(SUFFIX) : gemm.c
|
||||
$(CC) $(CFLAGS) -c -DHALF -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
endif
|
||||
|
||||
sgemm.$(SUFFIX) : gemm.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
|
@ -3460,18 +3413,6 @@ scnrm2.$(SUFFIX) : nrm2.c
|
|||
dznrm2.$(SUFFIX) : nrm2.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
somatcopy.$(SUFFIX) : omatcopy.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
domatcopy.$(SUFFIX) : omatcopy.c
|
||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
comatcopy.$(SUFFIX) : omatcopy.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
|
||||
|
||||
zomatcopy.$(SUFFIX) : omatcopy.c
|
||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
|
||||
|
||||
|
||||
smallscaling: smallscaling.c ../$(LIBNAME)
|
||||
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm -lpthread
|
||||
|
|
130
benchmark/amax.c
130
benchmark/amax.c
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AMAX
|
||||
|
||||
|
@ -43,8 +49,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
|
@ -53,48 +123,30 @@ int main(int argc, char *argv[])
|
|||
int l;
|
||||
char *p;
|
||||
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0)
|
||||
{
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -104,20 +156,25 @@ int main(int argc, char *argv[])
|
|||
{
|
||||
|
||||
timeg=0;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||
{
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
AMAX (&m, x, &inc_x);
|
||||
end();
|
||||
timeg += getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
@ -125,6 +182,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
127
benchmark/amin.c
127
benchmark/amin.c
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AMIN
|
||||
|
||||
|
@ -43,8 +49,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
blasint m, i;
|
||||
|
@ -57,44 +127,25 @@ int main(int argc, char *argv[])
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0)
|
||||
{
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -107,21 +158,24 @@ int main(int argc, char *argv[])
|
|||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||
{
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
AMIN (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
@ -129,6 +183,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
135
benchmark/asum.c
135
benchmark/asum.c
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef ASUM
|
||||
|
||||
|
@ -43,8 +49,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT result;
|
||||
|
@ -57,44 +127,27 @@ int main(int argc, char *argv[])
|
|||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;
|
||||
argv++;
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0)
|
||||
{
|
||||
from = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
to = MAX(atol(*argv), from);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0)
|
||||
{
|
||||
step = atol(*argv);
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++;}
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS")))
|
||||
loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX")))
|
||||
inc_x = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
|
||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
|
||||
|
||||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
|
||||
{
|
||||
fprintf(stderr, "Out of Memory!!\n");
|
||||
exit(1);
|
||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -107,19 +160,26 @@ int main(int argc, char *argv[])
|
|||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
|
||||
{
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = ASUM (&m, x, &inc_x);
|
||||
end();
|
||||
timeg += getsec();
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
if (loops > 1)
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
|
@ -127,6 +187,7 @@ int main(int argc, char *argv[])
|
|||
#else
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AXPBY
|
||||
|
||||
|
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
|
@ -58,6 +129,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -80,7 +152,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -104,10 +176,16 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
|
||||
end();
|
||||
timeg += getsec();
|
||||
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef AXPY
|
||||
|
||||
|
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
|
@ -56,6 +127,8 @@ int main(int argc, char *argv[]){
|
|||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timespec start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -78,7 +151,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -102,13 +175,13 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
clock_gettime( CLOCK_REALTIME, &start);
|
||||
|
||||
AXPY (&m, alpha, x, &inc_x, y, &inc_y );
|
||||
|
||||
end();
|
||||
clock_gettime( CLOCK_REALTIME, &stop);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -1,134 +0,0 @@
|
|||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <time.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <mach/mach_time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
|
||||
to avoid unreliable results. This technique, storing the allocated
|
||||
pointer value just before the aligned memory, doesn't require
|
||||
C11's aligned_alloc for compatibility with older compilers. */
|
||||
static void *aligned_alloc_cacheline(size_t n)
|
||||
{
|
||||
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
|
||||
if (p) {
|
||||
void **newp = (void **)
|
||||
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
|
||||
newp[-1] = p;
|
||||
p = newp;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
#define malloc aligned_alloc_cacheline
|
||||
#define free(p) free((p) ? ((void **)(p))[-1] : (p))
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
struct timeval start, stop;
|
||||
#elif defined(__APPLE__)
|
||||
mach_timebase_info_data_t info;
|
||||
uint64_t start = 0, stop = 0;
|
||||
#else
|
||||
struct timespec start = { 0, 0 }, stop = { 0, 0 };
|
||||
#endif
|
||||
|
||||
double getsec()
|
||||
{
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
#elif defined(__APPLE__)
|
||||
mach_timebase_info(&info);
|
||||
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
|
||||
#else
|
||||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
|
||||
#endif
|
||||
}
|
||||
|
||||
void begin() {
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
#elif defined(__APPLE__)
|
||||
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||
#else
|
||||
clock_gettime(CLOCK_REALTIME, &start);
|
||||
#endif
|
||||
}
|
||||
|
||||
void end() {
|
||||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
#elif defined(__APPLE__)
|
||||
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
|
||||
#else
|
||||
clock_gettime(CLOCK_REALTIME, &stop);
|
||||
#endif
|
||||
}
|
|
@ -36,7 +36,12 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
|
@ -66,6 +71,41 @@ double fabs(double);
|
|||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
static __inline double getmflops(int ratio, int m, double secs){
|
||||
|
||||
double mm = (double)m;
|
||||
|
@ -105,6 +145,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
FLOAT maxerr;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -179,19 +220,20 @@ int main(int argc, char *argv[]){
|
|||
|
||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
|
||||
if (!(uplos & 1)) {
|
||||
for (j = 0; j < m; j++) {
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef COPY
|
||||
|
||||
|
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
|
@ -57,9 +128,11 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1 = 0.0, timeg = 0.0;
|
||||
long nanos = 0;
|
||||
time_t seconds = 0;
|
||||
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
|
@ -81,7 +154,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -103,10 +176,15 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
begin();
|
||||
clock_gettime(CLOCK_REALTIME, &time_start);
|
||||
COPY (&m, x, &inc_x, y, &inc_y );
|
||||
end();
|
||||
timeg += getsec();
|
||||
clock_gettime(CLOCK_REALTIME, &time_end);
|
||||
|
||||
nanos = time_end.tv_nsec - time_start.tv_nsec;
|
||||
seconds = time_end.tv_sec - time_start.tv_sec;
|
||||
|
||||
time1 = seconds + nanos / 1.e9;
|
||||
timeg += time1;
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
|
|
@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef DOT
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define DOT BLASFUNC(ddot)
|
||||
#else
|
||||
#define DOT BLASFUNC(sdot)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
|
@ -49,6 +122,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -71,7 +145,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -95,12 +169,15 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
result = DOT (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
end();
|
||||
timeg += getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -36,7 +36,13 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEEV
|
||||
|
||||
|
@ -68,6 +74,71 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
|
|||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
|
||||
|
@ -83,6 +154,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -142,7 +214,7 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -151,7 +223,7 @@ int main(int argc, char *argv[]){
|
|||
for(m = from; m <= to; m += step){
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
#ifndef COMPLEX
|
||||
|
@ -167,14 +239,14 @@ int main(int argc, char *argv[]){
|
|||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
|
||||
#endif
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
|
@ -33,8 +39,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#ifdef DOUBLE
|
||||
#define GEMM BLASFUNC(dgemm)
|
||||
#elif defined(HALF)
|
||||
#define GEMM BLASFUNC(sbgemm)
|
||||
#else
|
||||
#define GEMM BLASFUNC(sgemm)
|
||||
#endif
|
||||
|
@ -49,10 +53,74 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
IFLOAT *a, *b;
|
||||
FLOAT *c;
|
||||
FLOAT *a, *b, *c;
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
FLOAT beta [] = {0.0, 0.0};
|
||||
char transa = 'N';
|
||||
|
@ -68,6 +136,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1, timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -115,25 +184,25 @@ int main(int argc, char *argv[]){
|
|||
k = to;
|
||||
}
|
||||
|
||||
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * m * k * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * k * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for (i = 0; i < m * k * COMPSIZE; i++) {
|
||||
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
||||
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < k * n * COMPSIZE; i++) {
|
||||
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < m * n * COMPSIZE; i++) {
|
||||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
|
@ -156,14 +225,14 @@ int main(int argc, char *argv[]){
|
|||
ldc = m;
|
||||
|
||||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
for (j=0; j<loops; j++) {
|
||||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
|
||||
}
|
||||
|
||||
end();
|
||||
time1 = getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMM
|
||||
|
||||
|
@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
|
@ -62,6 +133,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -91,7 +163,7 @@ int main(int argc, char *argv[]){
|
|||
loops = atoi(p);
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -115,12 +187,16 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
timeg += getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
|
|
@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GEMV
|
||||
|
@ -47,6 +52,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
|
@ -66,6 +137,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -109,7 +181,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -125,7 +197,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -139,10 +211,10 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
end();
|
||||
time1 = getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
@ -162,7 +234,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < n * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
a[(long)j + (long)i * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -176,10 +248,10 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
end();
|
||||
time1 = getsec();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef GER
|
||||
|
||||
|
@ -43,6 +49,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
|
@ -59,6 +131,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -92,7 +165,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -125,13 +198,16 @@ int main(int argc, char *argv[]){
|
|||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
|
|
@ -36,7 +36,12 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
|
@ -61,6 +66,71 @@ double fabs(double);
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
|
@ -72,6 +142,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -94,7 +165,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -123,18 +194,22 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
|
||||
time1 = getsec();
|
||||
|
||||
fprintf(stderr,
|
||||
"%10.2f MFlops %10.6f s\n",
|
||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -36,7 +36,12 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef GETRF
|
||||
#undef GETRI
|
||||
|
@ -67,22 +72,84 @@
|
|||
|
||||
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*work;
|
||||
FLOAT wkopt[4];
|
||||
blasint *ipiv;
|
||||
blasint m, i, j, l, info,lwork;
|
||||
blasint m, i, j, info,lwork;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
double time1,timeg;
|
||||
|
||||
char *p;
|
||||
char btest = 'I';
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
|
@ -90,9 +157,6 @@ int main(int argc, char *argv[]){
|
|||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++;}
|
||||
|
||||
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);
|
||||
|
||||
|
@ -124,48 +188,39 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE FLops Time Lwork\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
timeg = 0.;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
if (btest == 'F') begin();
|
||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||
if (btest == 'F') {
|
||||
end();
|
||||
timeg += getsec();
|
||||
}
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (btest == 'I') begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
lwork = -1;
|
||||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);
|
||||
|
||||
lwork = (blasint)wkopt[0];
|
||||
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
|
||||
if (btest == 'I') end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (btest == 'I')
|
||||
timeg += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
} // loops
|
||||
time1 = timeg/(double)loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops : %10.2f Sec : %d\n",
|
||||
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);
|
||||
|
|
|
@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HBMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HBMV BLASFUNC(zhbmv)
|
||||
#else
|
||||
#define HBMV BLASFUNC(chbmv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz) {
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size) {
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
|
@ -52,6 +125,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -84,7 +158,7 @@ int main(int argc, char *argv[]){
|
|||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -112,13 +186,15 @@ int main(int argc, char *argv[]){
|
|||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
timeg += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMM
|
||||
|
||||
|
@ -35,6 +41,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define HEMM BLASFUNC(chemm)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
|
@ -54,6 +126,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -78,7 +151,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -97,13 +170,13 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
|
|
@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HEMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HEMV BLASFUNC(zhemv)
|
||||
#else
|
||||
#define HEMV BLASFUNC(chemv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
|
@ -51,6 +124,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -78,7 +152,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -108,13 +182,13 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HER BLASFUNC(zher)
|
||||
#else
|
||||
#define HER BLASFUNC(cher)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x;
|
||||
|
@ -53,6 +126,8 @@ int main(int argc, char *argv[]){
|
|||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -74,7 +149,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -91,13 +166,15 @@ int main(int argc, char *argv[]){
|
|||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HER (&uplo, &m, alpha, x, &incx, a, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
|
|
@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER2
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HER2 BLASFUNC(zher2)
|
||||
#else
|
||||
#define HER2 BLASFUNC(cher2)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
|
@ -54,6 +127,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -77,7 +151,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -95,13 +169,16 @@ int main(int argc, char *argv[]){
|
|||
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
|
||||
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HER2K
|
||||
#ifdef DOUBLE
|
||||
|
@ -34,6 +40,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define HER2K BLASFUNC(cher2k)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
|
@ -53,6 +125,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -77,7 +150,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -96,13 +169,13 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
|
|
@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HERK
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HERK BLASFUNC(zherk)
|
||||
#else
|
||||
#define HERK BLASFUNC(cherk)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *c;
|
||||
|
@ -54,6 +127,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -75,7 +149,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -93,17 +167,18 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
|
||||
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
|
|
@ -25,16 +25,89 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef HPMV
|
||||
|
||||
|
||||
#ifdef DOUBLE
|
||||
#define HPMV BLASFUNC(zhpmv)
|
||||
#else
|
||||
#define HPMV BLASFUNC(chpmv)
|
||||
#endif
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz) {
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size) {
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
|
@ -51,6 +124,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -81,7 +155,7 @@ int main(int argc, char *argv[]){
|
|||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -109,13 +183,13 @@ int main(int argc, char *argv[]){
|
|||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IAMAX
|
||||
|
||||
|
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
|
@ -56,6 +127,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -73,7 +145,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -94,13 +166,13 @@ int main(int argc, char *argv[]){
|
|||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IAMAX (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IAMIN
|
||||
|
||||
|
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
|
@ -56,6 +127,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -73,7 +145,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -94,13 +166,13 @@ int main(int argc, char *argv[]){
|
|||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IAMIN (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IMAX
|
||||
|
||||
|
@ -37,6 +43,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
|
@ -50,6 +121,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -67,7 +139,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -88,13 +160,13 @@ int main(int argc, char *argv[]){
|
|||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IMAX (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef IMIN
|
||||
|
||||
|
@ -37,6 +43,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
|
@ -50,6 +121,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -67,7 +139,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -88,13 +160,13 @@ int main(int argc, char *argv[]){
|
|||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
IMIN (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -36,7 +36,12 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
|
@ -67,25 +72,87 @@ double fabs(double);
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b;
|
||||
blasint *ipiv;
|
||||
|
||||
blasint m, i, j, l, info;
|
||||
blasint m, i, j, info;
|
||||
blasint unit = 1;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
FLOAT maxerr;
|
||||
|
||||
double time1, time2, timeg1,timeg2;
|
||||
|
||||
char *p;
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
struct timeval start, stop;
|
||||
double time1, time2;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
|
@ -107,16 +174,16 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
fprintf(stderr, " SIZE Residual Decompose Solve Total\n");
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
timeg1 = timeg2 = 0.;
|
||||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
for(j = 0; j < m; j++){
|
||||
for(i = 0; i < m * COMPSIZE; i++){
|
||||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
|
@ -131,34 +198,32 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GETRF (&m, &m, a, &m, ipiv, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
timeg1 += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info) {
|
||||
fprintf(stderr, "Matrix is not singular .. %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
timeg2 += getsec();
|
||||
} //loops
|
||||
time1=timeg1/(double)loops;
|
||||
time2=timeg2/(double)loops;
|
||||
time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
maxerr = 0.;
|
||||
|
||||
for(i = 0; i < m; i++){
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef NAMAX
|
||||
|
||||
|
@ -37,6 +43,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
|
@ -50,6 +121,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -67,7 +139,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -88,13 +160,13 @@ int main(int argc, char *argv[]){
|
|||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
NAMAX (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef NAMIN
|
||||
|
||||
|
@ -37,6 +43,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
|
@ -50,6 +121,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -67,7 +139,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -88,13 +160,13 @@ int main(int argc, char *argv[]){
|
|||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
NAMIN (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef NRM2
|
||||
|
||||
|
@ -43,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
|
@ -56,6 +127,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -73,7 +145,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -94,13 +166,13 @@ int main(int argc, char *argv[]){
|
|||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
NRM2 (&m, x, &inc_x);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -1,122 +0,0 @@
|
|||
/***************************************************************************
|
||||
Copyright (c) 2024, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
|
||||
#undef OMATCOPY
|
||||
|
||||
#ifndef COMPLEX
|
||||
#ifdef DOUBLE
|
||||
#define OMATCOPY BLASFUNC(domatcopy)
|
||||
#else
|
||||
#define OMATCOPY BLASFUNC(somatcopy)
|
||||
#endif
|
||||
#else
|
||||
#ifdef DOUBLE
|
||||
#define OMATCOPY BLASFUNC(zomatcopy)
|
||||
#else
|
||||
#define OMATCOPY BLASFUNC(comatcopy)
|
||||
#endif
|
||||
#endif
|
||||
int main(int argc, char *argv[]){
|
||||
FLOAT *a, *b;
|
||||
FLOAT alpha[] = {1.0, 0.0};
|
||||
char trans = 'N';
|
||||
char order = 'C';
|
||||
blasint crows, ccols, clda, cldb;
|
||||
int loops = 1;
|
||||
char *p;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int i, j;
|
||||
|
||||
double time1, timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
if (argc > 0) { from = atol(*argv); argc--; argv++; }
|
||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
|
||||
if (argc > 0) { step = atol(*argv); argc--; argv++; }
|
||||
|
||||
if ((p = getenv("OPENBLAS_TRANS"))) {
|
||||
trans=*p;
|
||||
}
|
||||
if ((p = getenv("OPENBLAS_ORDER"))) {
|
||||
order=*p;
|
||||
}
|
||||
TOUPPER(trans);
|
||||
TOUPPER(order);
|
||||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c : Order=%c\n", from, to, step, trans, order);
|
||||
p = getenv("OPENBLAS_LOOPS");
|
||||
if ( p != NULL ) {
|
||||
loops = atoi(p);
|
||||
}
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
for (i = 0; i < to * to * COMPSIZE; i++) {
|
||||
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
for (i = 0; i < to * to * COMPSIZE; i++) {
|
||||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
fprintf(stderr, " SIZE Flops Time\n");
|
||||
for (i = from; i <= to; i += step) {
|
||||
cldb = clda = crows = ccols = i;
|
||||
fprintf(stderr, " ROWS=%4d, COLS=%4d : ", (int)crows, (int)ccols);
|
||||
begin();
|
||||
|
||||
for (j=0; j<loops; j++) {
|
||||
OMATCOPY (&order, &trans, &crows, &ccols, alpha, a, &clda, b, &cldb);
|
||||
}
|
||||
|
||||
end();
|
||||
time1 = getsec();
|
||||
|
||||
timeg = time1/loops;
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops %10.6f sec\n",
|
||||
COMPSIZE * COMPSIZE * (double)ccols * (double)crows / timeg * 1.e-6, time1);
|
||||
}
|
||||
|
||||
free(a);
|
||||
free(b);
|
||||
|
||||
return 0;
|
||||
}
|
|
@ -36,7 +36,12 @@
|
|||
/* or implied, of The University of Texas at Austin. */
|
||||
/*********************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
double fabs(double);
|
||||
|
||||
|
@ -81,7 +86,37 @@ double fabs(double);
|
|||
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
|
||||
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
|
@ -99,15 +134,15 @@ int main(int argc, char *argv[]){
|
|||
char *p;
|
||||
char btest = 'F';
|
||||
|
||||
blasint m, i, j, l, info, uplos=0;
|
||||
double flops = 0.;
|
||||
blasint m, i, j, info, uplos=0;
|
||||
double flops;
|
||||
|
||||
int from = 1;
|
||||
int to = 200;
|
||||
int step = 1;
|
||||
int loops = 1;
|
||||
|
||||
double time1, timeg;
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
||||
|
@ -120,8 +155,6 @@ int main(int argc, char *argv[]){
|
|||
|
||||
if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
|
||||
|
||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
|
||||
|
||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c\n", from, to, step,*uplo[uplos]);
|
||||
|
||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
|
||||
|
@ -132,10 +165,8 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
|
||||
for(m = from; m <= to; m += step){
|
||||
timeg=0.;
|
||||
for (l = 0; l < loops; l++) {
|
||||
|
||||
#ifndef COMPLEX
|
||||
if (uplos & 1) {
|
||||
for (j = 0; j < m; j++) {
|
||||
|
@ -186,19 +217,19 @@ int main(int argc, char *argv[]){
|
|||
|
||||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRF(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrf info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ( btest == 'F')
|
||||
timeg += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
|
||||
if ( btest == 'S' )
|
||||
{
|
||||
|
@ -209,43 +240,39 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potrs info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
timeg += getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||
|
||||
}
|
||||
|
||||
if ( btest == 'I' )
|
||||
{
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
POTRI(uplo[uplos], &m, b, &m, &info);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
if (info != 0) {
|
||||
fprintf(stderr, "Potri info = %d\n", info);
|
||||
exit(1);
|
||||
}
|
||||
timeg += getsec();
|
||||
}
|
||||
} // loops
|
||||
|
||||
time1 = timeg/(double)loops;
|
||||
if ( btest == 'F')
|
||||
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
if ( btest == 'S')
|
||||
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;
|
||||
if ( btest == 'I')
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
|
||||
}
|
||||
|
||||
fprintf(stderr, "%8d : %10.2f MFlops : %10.3f Sec : Test=%c\n",m,flops ,time1,btest);
|
||||
|
||||
|
||||
|
|
|
@ -1,49 +0,0 @@
|
|||
# Continuous benchmarking of OpenBLAS performance
|
||||
|
||||
We run a set of benchmarks of subset of OpenBLAS functionality.
|
||||
|
||||
## Benchmark runner
|
||||
|
||||
[](https://codspeed.io/OpenMathLib/OpenBLAS/)
|
||||
|
||||
Click on [benchmarks](https://codspeed.io/OpenMathLib/OpenBLAS/benchmarks) to see the performance of a particular benchmark over time;
|
||||
Click on [branches](https://codspeed.io/OpenMathLib/OpenBLAS/branches/) and then on the last PR link to see the flamegraphs.
|
||||
|
||||
## What are the benchmarks
|
||||
|
||||
We run raw BLAS/LAPACK subroutines, via f2py-generated python wrappers. The wrappers themselves are equivalent to [those from SciPy](https://docs.scipy.org/doc/scipy/reference/linalg.lapack.html).
|
||||
In fact, the wrappers _are_ from SciPy, we take a small subset simply to avoid having to build the whole SciPy for each CI run.
|
||||
|
||||
|
||||
## Adding a new benchmark
|
||||
|
||||
`.github/workflows/codspeed-bench.yml` does all the orchestration on CI.
|
||||
|
||||
Benchmarks live in the `benchmark/pybench` directory. It is organized as follows:
|
||||
|
||||
- benchmarks themselves live in the `benchmarks` folder. Note that the LAPACK routines are imported from the `openblas_wrap` package.
|
||||
- the `openblas_wrap` package is a simple trampoline: it contains an f2py extension, `_flapack`, which talks to OpenBLAS, and exports the python names in its `__init__.py`.
|
||||
This way, the `openblas_wrap` package shields the benchmarks from the details of where a particular LAPACK function comes from. If wanted, you may for instance swap the `_flapack` extension to
|
||||
`scipy.linalg.blas` and `scipy.linalg.lapack`.
|
||||
|
||||
To change parameters of an existing benchmark, edit python files in the `benchmark/pybench/benchmarks` directory.
|
||||
|
||||
To add a benchmark for a new BLAS or LAPACK function, you need to:
|
||||
|
||||
- add an f2py wrapper for the bare LAPACK function. You can simply copy a wrapper from SciPy (look for `*.pyf.src` files in https://github.com/scipy/scipy/tree/main/scipy/linalg)
|
||||
- add an import to `benchmark/pybench/openblas_wrap/__init__.py`
|
||||
|
||||
|
||||
## Running benchmarks locally
|
||||
|
||||
This benchmarking layer is orchestrated from python, therefore you'll need to
|
||||
have all what it takes to build OpenBLAS from source, plus `python` and
|
||||
|
||||
```
|
||||
$ python -mpip install numpy meson ninja pytest pytest-benchmark
|
||||
```
|
||||
|
||||
The benchmark syntax is consistent with that of `pytest-benchmark` framework. The incantation to run the suite locally is `$ pytest benchmark/pybench/benchmarks/test_blas.py`.
|
||||
|
||||
An ASV compatible benchmark suite is planned but currently not implemented.
|
||||
|
|
@ -1,274 +0,0 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
import openblas_wrap as ow
|
||||
|
||||
dtype_map = {
|
||||
's': np.float32,
|
||||
'd': np.float64,
|
||||
'c': np.complex64,
|
||||
'z': np.complex128,
|
||||
'dz': np.complex128,
|
||||
}
|
||||
|
||||
|
||||
# ### BLAS level 1 ###
|
||||
|
||||
# dnrm2
|
||||
|
||||
dnrm2_sizes = [100, 1000]
|
||||
|
||||
def run_dnrm2(n, x, incx, func):
|
||||
res = func(x, n, incx=incx)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['d', 'dz'])
|
||||
@pytest.mark.parametrize('n', dnrm2_sizes)
|
||||
def test_nrm2(benchmark, n, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
|
||||
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
|
||||
nrm2 = ow.get_func('nrm2', variant)
|
||||
result = benchmark(run_dnrm2, n, x, 1, nrm2)
|
||||
|
||||
|
||||
# ddot
|
||||
|
||||
ddot_sizes = [100, 1000]
|
||||
|
||||
def run_ddot(x, y, func):
|
||||
res = func(x, y)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('n', ddot_sizes)
|
||||
def test_dot(benchmark, n):
|
||||
rndm = np.random.RandomState(1234)
|
||||
|
||||
x = np.array(rndm.uniform(size=(n,)), dtype=float)
|
||||
y = np.array(rndm.uniform(size=(n,)), dtype=float)
|
||||
dot = ow.get_func('dot', 'd')
|
||||
result = benchmark(run_ddot, x, y, dot)
|
||||
|
||||
|
||||
# daxpy
|
||||
|
||||
daxpy_sizes = [100, 1000]
|
||||
|
||||
def run_daxpy(x, y, func):
|
||||
res = func(x, y, a=2.0)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
|
||||
@pytest.mark.parametrize('n', daxpy_sizes)
|
||||
def test_daxpy(benchmark, n, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
|
||||
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
|
||||
y = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
|
||||
axpy = ow.get_func('axpy', variant)
|
||||
result = benchmark(run_daxpy, x, y, axpy)
|
||||
|
||||
|
||||
# ### BLAS level 2 ###
|
||||
|
||||
gemv_sizes = [100, 1000]
|
||||
|
||||
def run_gemv(a, x, y, func):
|
||||
res = func(1.0, a, x, y=y, overwrite_y=True)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
|
||||
@pytest.mark.parametrize('n', gemv_sizes)
|
||||
def test_dgemv(benchmark, n, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
|
||||
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
|
||||
y = np.empty(n, dtype=dtyp)
|
||||
|
||||
a = np.array(rndm.uniform(size=(n,n)), dtype=dtyp)
|
||||
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
|
||||
y = np.zeros(n, dtype=dtyp)
|
||||
|
||||
gemv = ow.get_func('gemv', variant)
|
||||
result = benchmark(run_gemv, a, x, y, gemv)
|
||||
|
||||
assert result is y
|
||||
|
||||
|
||||
# dgbmv
|
||||
|
||||
dgbmv_sizes = [100, 1000]
|
||||
|
||||
def run_gbmv(m, n, kl, ku, a, x, y, func):
|
||||
res = func(m, n, kl, ku, 1.0, a, x, y=y, overwrite_y=True)
|
||||
return res
|
||||
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
|
||||
@pytest.mark.parametrize('n', dgbmv_sizes)
|
||||
@pytest.mark.parametrize('kl', [1])
|
||||
def test_dgbmv(benchmark, n, kl, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
|
||||
x = np.array(rndm.uniform(size=(n,)), dtype=dtyp)
|
||||
y = np.empty(n, dtype=dtyp)
|
||||
|
||||
m = n
|
||||
|
||||
a = rndm.uniform(size=(2*kl + 1, n))
|
||||
a = np.array(a, dtype=dtyp, order='F')
|
||||
|
||||
gbmv = ow.get_func('gbmv', variant)
|
||||
result = benchmark(run_gbmv, m, n, kl, kl, a, x, y, gbmv)
|
||||
assert result is y
|
||||
|
||||
|
||||
# ### BLAS level 3 ###
|
||||
|
||||
# dgemm
|
||||
|
||||
gemm_sizes = [100, 1000]
|
||||
|
||||
def run_gemm(a, b, c, func):
|
||||
alpha = 1.0
|
||||
res = func(alpha, a, b, c=c, overwrite_c=True)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
|
||||
@pytest.mark.parametrize('n', gemm_sizes)
|
||||
def test_gemm(benchmark, n, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
a = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F')
|
||||
b = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F')
|
||||
c = np.empty((n, n), dtype=dtyp, order='F')
|
||||
gemm = ow.get_func('gemm', variant)
|
||||
result = benchmark(run_gemm, a, b, c, gemm)
|
||||
assert result is c
|
||||
|
||||
|
||||
# dsyrk
|
||||
|
||||
syrk_sizes = [100, 1000]
|
||||
|
||||
|
||||
def run_syrk(a, c, func):
|
||||
res = func(1.0, a, c=c, overwrite_c=True)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
|
||||
@pytest.mark.parametrize('n', syrk_sizes)
|
||||
def test_syrk(benchmark, n, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
a = np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F')
|
||||
c = np.empty((n, n), dtype=dtyp, order='F')
|
||||
syrk = ow.get_func('syrk', variant)
|
||||
result = benchmark(run_syrk, a, c, syrk)
|
||||
assert result is c
|
||||
|
||||
|
||||
# ### LAPACK ###
|
||||
|
||||
# linalg.solve
|
||||
|
||||
gesv_sizes = [100, 1000]
|
||||
|
||||
|
||||
def run_gesv(a, b, func):
|
||||
res = func(a, b, overwrite_a=True, overwrite_b=True)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd', 'c', 'z'])
|
||||
@pytest.mark.parametrize('n', gesv_sizes)
|
||||
def test_gesv(benchmark, n, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
|
||||
a = (np.array(rndm.uniform(size=(n, n)), dtype=dtyp, order='F') +
|
||||
np.eye(n, dtype=dtyp, order='F'))
|
||||
b = np.array(rndm.uniform(size=(n, 1)), dtype=dtyp, order='F')
|
||||
gesv = ow.get_func('gesv', variant)
|
||||
lu, piv, x, info = benchmark(run_gesv, a, b, gesv)
|
||||
assert lu is a
|
||||
assert x is b
|
||||
assert info == 0
|
||||
|
||||
|
||||
# linalg.svd
|
||||
|
||||
gesdd_sizes = [(100, 5), (1000, 222)]
|
||||
|
||||
|
||||
def run_gesdd(a, lwork, func):
|
||||
res = func(a, lwork=lwork, full_matrices=False, overwrite_a=False)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd'])
|
||||
@pytest.mark.parametrize('mn', gesdd_sizes)
|
||||
def test_gesdd(benchmark, mn, variant):
|
||||
m, n = mn
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
|
||||
a = np.array(rndm.uniform(size=(m, n)), dtype=dtyp, order='F')
|
||||
|
||||
gesdd_lwork = ow.get_func('gesdd_lwork', variant)
|
||||
|
||||
lwork, info = gesdd_lwork(m, n)
|
||||
lwork = int(lwork)
|
||||
assert info == 0
|
||||
|
||||
gesdd = ow.get_func('gesdd', variant)
|
||||
u, s, vt, info = benchmark(run_gesdd, a, lwork, gesdd)
|
||||
|
||||
assert info == 0
|
||||
|
||||
atol = {'s': 1e-5, 'd': 1e-13}
|
||||
np.testing.assert_allclose(u @ np.diag(s) @ vt, a, atol=atol[variant])
|
||||
|
||||
|
||||
# linalg.eigh
|
||||
|
||||
syev_sizes = [50, 200]
|
||||
|
||||
|
||||
def run_syev(a, lwork, func):
|
||||
res = func(a, lwork=lwork, overwrite_a=True)
|
||||
return res
|
||||
|
||||
|
||||
@pytest.mark.parametrize('variant', ['s', 'd'])
|
||||
@pytest.mark.parametrize('n', syev_sizes)
|
||||
def test_syev(benchmark, n, variant):
|
||||
rndm = np.random.RandomState(1234)
|
||||
dtyp = dtype_map[variant]
|
||||
|
||||
a = rndm.uniform(size=(n, n))
|
||||
a = np.asarray(a + a.T, dtype=dtyp, order='F')
|
||||
a_ = a.copy()
|
||||
|
||||
dsyev_lwork = ow.get_func('syev_lwork', variant)
|
||||
lwork, info = dsyev_lwork(n)
|
||||
lwork = int(lwork)
|
||||
assert info == 0
|
||||
|
||||
syev = ow.get_func('syev', variant)
|
||||
w, v, info = benchmark(run_syev, a, lwork, syev)
|
||||
|
||||
assert info == 0
|
||||
assert a is v # overwrite_a=True
|
||||
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
#
|
||||
# Taken from SciPy (of course)
|
||||
#
|
||||
project(
|
||||
'openblas-wrap',
|
||||
'c', 'fortran',
|
||||
version: '0.1',
|
||||
license: 'BSD-3',
|
||||
meson_version: '>= 1.1.0',
|
||||
default_options: [
|
||||
'buildtype=debugoptimized',
|
||||
'b_ndebug=if-release',
|
||||
'c_std=c17',
|
||||
'fortran_std=legacy',
|
||||
],
|
||||
)
|
||||
|
||||
py3 = import('python').find_installation(pure: false)
|
||||
py3_dep = py3.dependency()
|
||||
|
||||
cc = meson.get_compiler('c')
|
||||
|
||||
_global_c_args = cc.get_supported_arguments(
|
||||
'-Wno-unused-but-set-variable',
|
||||
'-Wno-unused-function',
|
||||
'-Wno-conversion',
|
||||
'-Wno-misleading-indentation',
|
||||
)
|
||||
add_project_arguments(_global_c_args, language : 'c')
|
||||
|
||||
# We need -lm for all C code (assuming it uses math functions, which is safe to
|
||||
# assume for SciPy). For C++ it isn't needed, because libstdc++/libc++ is
|
||||
# guaranteed to depend on it. For Fortran code, Meson already adds `-lm`.
|
||||
m_dep = cc.find_library('m', required : false)
|
||||
if m_dep.found()
|
||||
add_project_link_arguments('-lm', language : 'c')
|
||||
endif
|
||||
|
||||
generate_f2pymod = find_program('openblas_wrap/generate_f2pymod.py')
|
||||
|
||||
openblas = dependency('openblas', method: 'pkg-config', required: true)
|
||||
openblas_dep = declare_dependency(
|
||||
dependencies: openblas,
|
||||
compile_args: []
|
||||
)
|
||||
|
||||
|
||||
subdir('openblas_wrap')
|
|
@ -1,17 +0,0 @@
|
|||
"""
|
||||
Trampoline to hide the LAPACK details (scipy.lapack.linalg or scipy_openblas32 or...)
|
||||
from benchmarking.
|
||||
"""
|
||||
|
||||
__version__ = "0.1"
|
||||
|
||||
|
||||
from . import _flapack
|
||||
|
||||
PREFIX = ''
|
||||
|
||||
|
||||
def get_func(name, variant):
|
||||
"""get_func('gesv', 'c') -> cgesv etc."""
|
||||
return getattr(_flapack, PREFIX + variant + name)
|
||||
|
|
@ -1,417 +0,0 @@
|
|||
!
|
||||
! Taken from scipy/linalg
|
||||
!
|
||||
! Shorthand notations
|
||||
!
|
||||
! <tchar=s,d,cs,zd>
|
||||
! <tchar2c=cs,zd>
|
||||
!
|
||||
! <prefix2=s,d>
|
||||
! <prefix2c=c,z>
|
||||
! <prefix3=s,sc>
|
||||
! <prefix4=d,dz>
|
||||
! <prefix6=s,d,c,z,c,z>
|
||||
!
|
||||
! <ftype2=real,double precision>
|
||||
! <ftype2c=complex,double complex>
|
||||
! <ftype3=real,complex>
|
||||
! <ftype4=double precision,double complex>
|
||||
! <ftypereal3=real,real>
|
||||
! <ftypereal4=double precision,double precision>
|
||||
! <ftype6=real,double precision,complex,double complex,\2,\3>
|
||||
! <ftype6creal=real,double precision,complex,double complex,\0,\1>
|
||||
!
|
||||
! <ctype2=float,double>
|
||||
! <ctype2c=complex_float,complex_double>
|
||||
! <ctype3=float,complex_float>
|
||||
! <ctype4=double,complex_double>
|
||||
! <ctypereal3=float,float>
|
||||
! <ctypereal4=double,double>
|
||||
! <ctype6=float,double,complex_float,complex_double,\2,\3>
|
||||
! <ctype6creal=float,double,complex_float,complex_double,\0,\1>
|
||||
!
|
||||
!
|
||||
! Level 1 BLAS
|
||||
!
|
||||
|
||||
|
||||
python module _flapack
|
||||
usercode '''
|
||||
#define F_INT int
|
||||
'''
|
||||
|
||||
interface
|
||||
|
||||
|
||||
subroutine <prefix>axpy(n,a,x,offx,incx,y,offy,incy)
|
||||
! Calculate z = a*x+y, where a is scalar.
|
||||
|
||||
callstatement (*f2py_func)(&n,&a,x+offx,&incx,y+offy,&incy)
|
||||
callprotoargument F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*,F_INT*
|
||||
|
||||
<ftype> dimension(*), intent(in) :: x
|
||||
<ftype> dimension(*), intent(in,out,out=z) :: y
|
||||
<ftype> optional, intent(in):: a=<1.0,\0,(1.0\,0.0),\2>
|
||||
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
|
||||
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
|
||||
integer optional, intent(in),depend(x) :: offx=0
|
||||
integer optional, intent(in),depend(y) :: offy=0
|
||||
check(offx>=0 && offx<len(x)) :: offx
|
||||
check(offy>=0 && offy<len(y)) :: offy
|
||||
integer optional, intent(in),depend(x,incx,offx,y,incy,offy) :: &
|
||||
n = (len(x)-offx)/abs(incx)
|
||||
check(len(x)-offx>(n-1)*abs(incx)) :: n
|
||||
check(len(y)-offy>(n-1)*abs(incy)) :: n
|
||||
|
||||
end subroutine <prefix>axpy
|
||||
|
||||
function ddot(n,x,offx,incx,y,offy,incy) result (xy)
|
||||
! Computes a vector-vector dot product.
|
||||
|
||||
callstatement ddot_return_value = (*f2py_func)(&n,x+offx,&incx,y+offy,&incy)
|
||||
callprotoargument F_INT*,double*,F_INT*,double*,F_INT*
|
||||
intent(c) ddot
|
||||
fortranname F_FUNC(ddot,DDOT)
|
||||
|
||||
double precision dimension(*), intent(in) :: x
|
||||
double precision dimension(*), intent(in) :: y
|
||||
double precision ddot,xy
|
||||
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
|
||||
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
|
||||
integer optional, intent(in),depend(x) :: offx=0
|
||||
integer optional, intent(in),depend(y) :: offy=0
|
||||
check(offx>=0 && offx<len(x)) :: offx
|
||||
check(offy>=0 && offy<len(y)) :: offy
|
||||
integer optional, intent(in),depend(x,incx,offx,y,incy,offy) :: &
|
||||
n = (len(x)-offx)/abs(incx)
|
||||
check(len(x)-offx>(n-1)*abs(incx)) :: n
|
||||
check(len(y)-offy>(n-1)*abs(incy)) :: n
|
||||
|
||||
end function ddot
|
||||
|
||||
|
||||
function <prefix4>nrm2(n,x,offx,incx) result(n2)
|
||||
|
||||
<ftypereal4> <prefix4>nrm2, n2
|
||||
|
||||
callstatement <prefix4>nrm2_return_value = (*f2py_func)(&n,x+offx,&incx)
|
||||
callprotoargument F_INT*,<ctype4>*,F_INT*
|
||||
intent(c) <prefix4>nrm2
|
||||
fortranname F_FUNC(<prefix4>nrm2,<D,DZ>NRM2)
|
||||
|
||||
<ftype4> dimension(*),intent(in) :: x
|
||||
|
||||
integer optional, intent(in),check(incx>0) :: incx = 1
|
||||
|
||||
integer optional,intent(in),depend(x) :: offx=0
|
||||
check(offx>=0 && offx<len(x)) :: offx
|
||||
|
||||
integer optional,intent(in),depend(x,incx,offx) :: n = (len(x)-offx)/abs(incx)
|
||||
check(len(x)-offx>(n-1)*abs(incx)) :: n
|
||||
|
||||
end function <prefix4>nrm2
|
||||
|
||||
|
||||
!
|
||||
! Level 2 BLAS
|
||||
!
|
||||
|
||||
|
||||
subroutine <prefix>gemv(m,n,alpha,a,x,beta,y,offx,incx,offy,incy,trans,rows,cols,ly)
|
||||
! Computes a matrix-vector product using a general matrix
|
||||
!
|
||||
! y = gemv(alpha,a,x,beta=0,y=0,offx=0,incx=1,offy=0,incy=0,trans=0)
|
||||
! Calculate y <- alpha * op(A) * x + beta * y
|
||||
|
||||
callstatement (*f2py_func)((trans?(trans==2?"C":"T"):"N"),&m,&n,&alpha,a,&m, &
|
||||
x+offx,&incx,&beta,y+offy,&incy)
|
||||
callprotoargument char*,F_INT*,F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*,F_INT*,<ctype>*, &
|
||||
<ctype>*,F_INT*
|
||||
|
||||
integer optional, intent(in), check(trans>=0 && trans <=2) :: trans = 0
|
||||
integer optional, intent(in), check(incx>0||incx<0) :: incx = 1
|
||||
integer optional, intent(in), check(incy>0||incy<0) :: incy = 1
|
||||
<ftype> intent(in) :: alpha
|
||||
<ftype> intent(in), optional :: beta = <0.0,\0,(0.0\,0.0),\2>
|
||||
|
||||
<ftype> dimension(*), intent(in) :: x
|
||||
<ftype> dimension(ly), intent(in,copy,out), depend(ly),optional :: y
|
||||
integer intent(hide), depend(incy,rows,offy) :: ly = &
|
||||
(y_capi==Py_None?1+offy+(rows-1)*abs(incy):-1)
|
||||
<ftype> dimension(m,n), intent(in) :: a
|
||||
integer depend(a), intent(hide):: m = shape(a,0)
|
||||
integer depend(a), intent(hide):: n = shape(a,1)
|
||||
|
||||
integer optional, intent(in) :: offx=0
|
||||
integer optional, intent(in) :: offy=0
|
||||
check(offx>=0 && offx<len(x)) :: x
|
||||
check(len(x)>offx+(cols-1)*abs(incx)) :: x
|
||||
depend(offx,cols,incx) :: x
|
||||
|
||||
check(offy>=0 && offy<len(y)) :: y
|
||||
check(len(y)>offy+(rows-1)*abs(incy)) :: y
|
||||
depend(offy,rows,incy) :: y
|
||||
|
||||
integer depend(m,n,trans), intent(hide) :: rows = (trans?n:m)
|
||||
integer depend(m,n,trans), intent(hide) :: cols = (trans?m:n)
|
||||
|
||||
end subroutine <prefix>gemv
|
||||
|
||||
|
||||
subroutine <prefix>gbmv(m,n,kl,ku,alpha,a,lda,x,incx,offx,beta,y,incy,offy,trans,ly)
|
||||
! Performs one of the matrix-vector operations
|
||||
!
|
||||
! y := alpha*A*x + beta*y, or y := alpha*A**T*x + beta*y,
|
||||
! or y := alpha*A**H*x + beta*y,
|
||||
!
|
||||
! where alpha and beta are scalars, x and y are vectors and A is an
|
||||
! m by n band matrix, with kl sub-diagonals and ku super-diagonals.
|
||||
|
||||
callstatement (*f2py_func)((trans?(trans==2?"C":"T"):"N"),&m,&n,&kl,&ku,&alpha,a,&lda,x+offx,&incx,&beta,y+offy,&incy)
|
||||
callprotoargument char*,F_INT*,F_INT*,F_INT*,F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*,F_INT*,<ctype>*,<ctype>*,F_INT*
|
||||
|
||||
integer optional,intent(in),check(trans>=0 && trans <=2) :: trans = 0
|
||||
integer intent(in), depend(ku,kl),check(m>=ku+kl+1) :: m
|
||||
integer intent(in),check(n>=0&&n==shape(a,1)),depend(a) :: n
|
||||
integer intent(in),check(kl>=0) :: kl
|
||||
integer intent(in),check(ku>=0) :: ku
|
||||
integer intent(hide),depend(a) :: lda = MAX(shape(a,0),1)
|
||||
integer optional, intent(in),check(incx>0||incx<0) :: incx = 1
|
||||
integer optional, intent(in),check(incy>0||incy<0) :: incy = 1
|
||||
integer intent(hide),depend(m,n,incy,offy,trans) :: ly = &
|
||||
(y_capi==Py_None?1+offy+(trans==0?m-1:n-1)*abs(incy):-1)
|
||||
integer optional, intent(in) :: offx=0
|
||||
integer optional, intent(in) :: offy=0
|
||||
|
||||
<ftype> intent(in) :: alpha
|
||||
<ftype> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2>
|
||||
|
||||
<ftype> dimension(lda,n),intent(in) :: a
|
||||
|
||||
<ftype> dimension(ly), intent(in,out,copy,out=yout),depend(ly),optional :: y
|
||||
check(offy>=0 && offy<len(y)) :: y
|
||||
check(len(y)>offy+(trans==0?m-1:n-1)*abs(incy)) :: y
|
||||
depend(offy,n,incy) :: y
|
||||
|
||||
<ftype> dimension(*), intent(in) :: x
|
||||
check(offx>=0 && offx<len(x)) :: x
|
||||
check(len(x)>offx+(trans==0?n-1:m-1)*abs(incx)) :: x
|
||||
depend(offx,n,incx) :: x
|
||||
|
||||
end subroutine <prefix>gbmv
|
||||
|
||||
|
||||
|
||||
!
|
||||
! Level 3 BLAS
|
||||
!
|
||||
|
||||
|
||||
subroutine <prefix>gemm(m,n,k,alpha,a,b,beta,c,trans_a,trans_b,lda,ka,ldb,kb)
|
||||
! Computes a scalar-matrix-matrix product and adds the result to a
|
||||
! scalar-matrix product.
|
||||
!
|
||||
! c = gemm(alpha,a,b,beta=0,c=0,trans_a=0,trans_b=0,overwrite_c=0)
|
||||
! Calculate C <- alpha * op(A) * op(B) + beta * C
|
||||
|
||||
callstatement (*f2py_func)((trans_a?(trans_a==2?"C":"T"):"N"), &
|
||||
(trans_b?(trans_b==2?"C":"T"):"N"),&m,&n,&k,&alpha,a,&lda,b,&ldb,&beta,c,&m)
|
||||
callprotoargument char*,char*,F_INT*,F_INT*,F_INT*,<ctype>*,<ctype>*,F_INT*,<ctype>*, &
|
||||
F_INT*,<ctype>*,<ctype>*,F_INT*
|
||||
|
||||
integer optional,intent(in),check(trans_a>=0 && trans_a <=2) :: trans_a = 0
|
||||
integer optional,intent(in),check(trans_b>=0 && trans_b <=2) :: trans_b = 0
|
||||
<ftype> intent(in) :: alpha
|
||||
<ftype> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2>
|
||||
|
||||
<ftype> dimension(lda,ka),intent(in) :: a
|
||||
<ftype> dimension(ldb,kb),intent(in) :: b
|
||||
<ftype> dimension(m,n),intent(in,out,copy),depend(m,n),optional :: c
|
||||
check(shape(c,0)==m && shape(c,1)==n) :: c
|
||||
|
||||
integer depend(a),intent(hide) :: lda = shape(a,0)
|
||||
integer depend(a),intent(hide) :: ka = shape(a,1)
|
||||
integer depend(b),intent(hide) :: ldb = shape(b,0)
|
||||
integer depend(b),intent(hide) :: kb = shape(b,1)
|
||||
|
||||
integer depend(a,trans_a,ka,lda),intent(hide):: m = (trans_a?ka:lda)
|
||||
integer depend(a,trans_a,ka,lda),intent(hide):: k = (trans_a?lda:ka)
|
||||
integer depend(b,trans_b,kb,ldb,k),intent(hide),check(trans_b?kb==k:ldb==k) :: &
|
||||
n = (trans_b?ldb:kb)
|
||||
|
||||
end subroutine <prefix>gemm
|
||||
|
||||
|
||||
subroutine <prefix6><sy,\0,\0,\0,he,he>rk(n,k,alpha,a,beta,c,trans,lower,lda,ka)
|
||||
! performs one of the symmetric rank k operations
|
||||
! C := alpha*A*A**T + beta*C, or C := alpha*A**T*A + beta*C,
|
||||
!
|
||||
! c = syrk(alpha,a,beta=0,c=0,trans=0,lower=0,overwrite_c=0)
|
||||
!
|
||||
callstatement (*f2py_func)((lower?"L":"U"), &
|
||||
(trans?(trans==2?"C":"T"):"N"), &n,&k,&alpha,a,&lda,&beta,c,&n)
|
||||
callprotoargument char*,char*,F_INT*,F_INT*,<ctype6>*,<ctype6>*,F_INT*,<ctype6>*, &
|
||||
<ctype6>*,F_INT*
|
||||
|
||||
integer optional, intent(in),check(lower==0||lower==1) :: lower = 0
|
||||
integer optional,intent(in),check(trans>=0 && trans <=2) :: trans = 0
|
||||
|
||||
<ftype6> intent(in) :: alpha
|
||||
<ftype6> intent(in),optional :: beta = <0.0,\0,(0.0\,0.0),\2,\2,\2>
|
||||
|
||||
<ftype6> dimension(lda,ka),intent(in) :: a
|
||||
<ftype6> dimension(n,n),intent(in,out,copy),depend(n),optional :: c
|
||||
check(shape(c,0)==n && shape(c,1)==n) :: c
|
||||
|
||||
integer depend(a),intent(hide) :: lda = shape(a,0)
|
||||
integer depend(a),intent(hide) :: ka = shape(a,1)
|
||||
|
||||
integer depend(a, trans, ka, lda), intent(hide) :: n = (trans ? ka : lda)
|
||||
integer depend(a, trans, ka, lda), intent(hide) :: k = (trans ? lda : ka)
|
||||
|
||||
end subroutine <prefix6><sy,\0,\0,\0,he,he>rk
|
||||
|
||||
|
||||
!
|
||||
! LAPACK
|
||||
!
|
||||
|
||||
subroutine <prefix>gesv(n,nrhs,a,piv,b,info)
|
||||
! lu,piv,x,info = gesv(a,b,overwrite_a=0,overwrite_b=0)
|
||||
! Solve A * X = B.
|
||||
! A = P * L * U
|
||||
! U is upper diagonal triangular, L is unit lower triangular,
|
||||
! piv pivots columns.
|
||||
|
||||
callstatement {F_INT i;(*f2py_func)(&n,&nrhs,a,&n,piv,b,&n,&info);for(i=0;i\<n;--piv[i++]);}
|
||||
callprotoargument F_INT*,F_INT*,<ctype>*,F_INT*,F_INT*,<ctype>*,F_INT*,F_INT*
|
||||
|
||||
integer depend(a),intent(hide):: n = shape(a,0)
|
||||
integer depend(b),intent(hide):: nrhs = shape(b,1)
|
||||
<ftype> dimension(n,n),check(shape(a,0)==shape(a,1)) :: a
|
||||
integer dimension(n),depend(n),intent(out) :: piv
|
||||
<ftype> dimension(n,nrhs),check(shape(a,0)==shape(b,0)),depend(n) :: b
|
||||
integer intent(out)::info
|
||||
intent(in,out,copy,out=x) b
|
||||
intent(in,out,copy,out=lu) a
|
||||
end subroutine <prefix>gesv
|
||||
|
||||
|
||||
subroutine <prefix2>gesdd(m,n,minmn,u0,u1,vt0,vt1,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info)
|
||||
! u,s,vt,info = gesdd(a,compute_uv=1,lwork=..,overwrite_a=0)
|
||||
! Compute the singular value decomposition (SVD) using divide and conquer:
|
||||
! A = U * SIGMA * transpose(V)
|
||||
! A - M x N matrix
|
||||
! U - M x M matrix or min(M,N) x N if full_matrices=False
|
||||
! SIGMA - M x N zero matrix with a main diagonal filled with min(M,N)
|
||||
! singular values
|
||||
! transpose(V) - N x N matrix or N x min(M,N) if full_matrices=False
|
||||
|
||||
callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,a,&m,s,u,&u0,vt,&vt0,work,&lwork,iwork,&info)
|
||||
callprotoargument char*,F_INT*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*
|
||||
|
||||
integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1
|
||||
integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1
|
||||
integer intent(hide),depend(a):: m = shape(a,0)
|
||||
integer intent(hide),depend(a):: n = shape(a,1)
|
||||
integer intent(hide),depend(m,n):: minmn = MIN(m,n)
|
||||
integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1)
|
||||
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: u1 = (compute_uv?(full_matrices?m:minmn):1)
|
||||
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1)
|
||||
integer intent(hide),depend(compute_uv,minmn) :: vt1 = (compute_uv?n:1)
|
||||
<ftype2> dimension(m,n),intent(in,copy,aligned8) :: a
|
||||
<ftype2> dimension(minmn),intent(out),depend(minmn) :: s
|
||||
<ftype2> dimension(u0,u1),intent(out),depend(u0, u1) :: u
|
||||
<ftype2> dimension(vt0,vt1),intent(out),depend(vt0, vt1) :: vt
|
||||
<ftype2> dimension(lwork),intent(hide,cache),depend(lwork) :: work
|
||||
integer optional,intent(in),depend(minmn,compute_uv) &
|
||||
:: lwork = max((compute_uv?4*minmn*minmn+MAX(m,n)+9*minmn:MAX(14*minmn+4,10*minmn+2+25*(25+8))+MAX(m,n)),1)
|
||||
integer intent(hide,cache),dimension(8*minmn),depend(minmn) :: iwork
|
||||
integer intent(out)::info
|
||||
|
||||
end subroutine <prefix2>gesdd
|
||||
|
||||
subroutine <prefix2>gesdd_lwork(m,n,minmn,u0,vt0,a,compute_uv,full_matrices,u,s,vt,work,lwork,iwork,info)
|
||||
! LWORK computation for (S/D)GESDD
|
||||
|
||||
fortranname <prefix2>gesdd
|
||||
callstatement (*f2py_func)((compute_uv?(full_matrices?"A":"S"):"N"),&m,&n,&a,&m,&s,&u,&u0,&vt,&vt0,&work,&lwork,&iwork,&info)
|
||||
callprotoargument char*,F_INT*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,F_INT*,F_INT*,F_INT*
|
||||
|
||||
integer intent(in),optional,check(compute_uv==0||compute_uv==1):: compute_uv = 1
|
||||
integer intent(in),optional,check(full_matrices==0||full_matrices==1):: full_matrices = 1
|
||||
integer intent(in) :: m
|
||||
integer intent(in) :: n
|
||||
integer intent(hide),depend(m,n):: minmn = MIN(m,n)
|
||||
integer intent(hide),depend(compute_uv,minmn) :: u0 = (compute_uv?m:1)
|
||||
integer intent(hide),depend(compute_uv,minmn, full_matrices) :: vt0 = (compute_uv?(full_matrices?n:minmn):1)
|
||||
<ftype2> intent(hide) :: a
|
||||
<ftype2> intent(hide) :: s
|
||||
<ftype2> intent(hide) :: u
|
||||
<ftype2> intent(hide) :: vt
|
||||
<ftype2> intent(out) :: work
|
||||
integer intent(hide) :: lwork = -1
|
||||
integer intent(hide) :: iwork
|
||||
integer intent(out) :: info
|
||||
|
||||
end subroutine <prefix2>gesdd_lwork
|
||||
|
||||
|
||||
subroutine <prefix2>syev(compute_v,lower,n,w,a,lda,work,lwork,info)
|
||||
! w,v,info = syev(a,compute_v=1,lower=0,lwork=3*n-1,overwrite_a=0)
|
||||
! Compute all eigenvalues and, optionally, eigenvectors of a
|
||||
! real symmetric matrix A.
|
||||
!
|
||||
! Performance tip:
|
||||
! If compute_v=0 then set also overwrite_a=1.
|
||||
|
||||
callstatement (*f2py_func)((compute_v?"V":"N"),(lower?"L":"U"),&n,a,&lda,w,work,&lwork,&info)
|
||||
callprotoargument char*,char*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,F_INT*
|
||||
|
||||
integer optional,intent(in):: compute_v = 1
|
||||
check(compute_v==1||compute_v==0) compute_v
|
||||
integer optional,intent(in),check(lower==0||lower==1) :: lower = 0
|
||||
|
||||
integer intent(hide),depend(a):: n = shape(a,0)
|
||||
integer intent(hide),depend(a):: lda = MAX(1,shape(a,0))
|
||||
<ftype2> dimension(n,n),check(shape(a,0)==shape(a,1)) :: a
|
||||
intent(in,copy,out,out=v) :: a
|
||||
|
||||
<ftype2> dimension(n),intent(out),depend(n) :: w
|
||||
|
||||
integer optional,intent(in),depend(n) :: lwork=max(3*n-1,1)
|
||||
check(lwork>=3*n-1) :: lwork
|
||||
<ftype2> dimension(lwork),intent(hide),depend(lwork) :: work
|
||||
|
||||
integer intent(out) :: info
|
||||
|
||||
end subroutine <prefix2>syev
|
||||
|
||||
|
||||
subroutine <prefix2>syev_lwork(lower,n,w,a,lda,work,lwork,info)
|
||||
! LWORK routines for syev
|
||||
|
||||
fortranname <prefix2>syev
|
||||
|
||||
callstatement (*f2py_func)("N",(lower?"L":"U"),&n,&a,&lda,&w,&work,&lwork,&info)
|
||||
callprotoargument char*,char*,F_INT*,<ctype2>*,F_INT*,<ctype2>*,<ctype2>*,F_INT*,F_INT*
|
||||
|
||||
integer intent(in):: n
|
||||
integer optional,intent(in),check(lower==0||lower==1) :: lower = 0
|
||||
|
||||
integer intent(hide),depend(n):: lda = MAX(1, n)
|
||||
<ftype2> intent(hide):: a
|
||||
<ftype2> intent(hide):: w
|
||||
integer intent(hide):: lwork = -1
|
||||
|
||||
<ftype2> intent(out):: work
|
||||
integer intent(out):: info
|
||||
|
||||
end subroutine <prefix2>syev_lwork
|
||||
|
||||
end interface
|
||||
|
||||
end python module _flapack
|
||||
|
||||
|
||||
|
|
@ -1,299 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Process f2py template files (`filename.pyf.src` -> `filename.pyf`)
|
||||
|
||||
Usage: python generate_pyf.py filename.pyf.src -o filename.pyf
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import subprocess
|
||||
import argparse
|
||||
|
||||
|
||||
# START OF CODE VENDORED FROM `numpy.distutils.from_template`
|
||||
#############################################################
|
||||
"""
|
||||
process_file(filename)
|
||||
|
||||
takes templated file .xxx.src and produces .xxx file where .xxx
|
||||
is .pyf .f90 or .f using the following template rules:
|
||||
|
||||
'<..>' denotes a template.
|
||||
|
||||
All function and subroutine blocks in a source file with names that
|
||||
contain '<..>' will be replicated according to the rules in '<..>'.
|
||||
|
||||
The number of comma-separated words in '<..>' will determine the number of
|
||||
replicates.
|
||||
|
||||
'<..>' may have two different forms, named and short. For example,
|
||||
|
||||
named:
|
||||
<p=d,s,z,c> where anywhere inside a block '<p>' will be replaced with
|
||||
'd', 's', 'z', and 'c' for each replicate of the block.
|
||||
|
||||
<_c> is already defined: <_c=s,d,c,z>
|
||||
<_t> is already defined: <_t=real,double precision,complex,double complex>
|
||||
|
||||
short:
|
||||
<s,d,c,z>, a short form of the named, useful when no <p> appears inside
|
||||
a block.
|
||||
|
||||
In general, '<..>' contains a comma separated list of arbitrary
|
||||
expressions. If these expression must contain a comma|leftarrow|rightarrow,
|
||||
then prepend the comma|leftarrow|rightarrow with a backslash.
|
||||
|
||||
If an expression matches '\\<index>' then it will be replaced
|
||||
by <index>-th expression.
|
||||
|
||||
Note that all '<..>' forms in a block must have the same number of
|
||||
comma-separated entries.
|
||||
|
||||
Predefined named template rules:
|
||||
<prefix=s,d,c,z>
|
||||
<ftype=real,double precision,complex,double complex>
|
||||
<ftypereal=real,double precision,\\0,\\1>
|
||||
<ctype=float,double,complex_float,complex_double>
|
||||
<ctypereal=float,double,\\0,\\1>
|
||||
"""
|
||||
|
||||
routine_start_re = re.compile(
|
||||
r'(\n|\A)(( (\$|\*))|)\s*(subroutine|function)\b',
|
||||
re.I
|
||||
)
|
||||
routine_end_re = re.compile(r'\n\s*end\s*(subroutine|function)\b.*(\n|\Z)', re.I)
|
||||
function_start_re = re.compile(r'\n (\$|\*)\s*function\b', re.I)
|
||||
|
||||
def parse_structure(astr):
|
||||
""" Return a list of tuples for each function or subroutine each
|
||||
tuple is the start and end of a subroutine or function to be
|
||||
expanded.
|
||||
"""
|
||||
|
||||
spanlist = []
|
||||
ind = 0
|
||||
while True:
|
||||
m = routine_start_re.search(astr, ind)
|
||||
if m is None:
|
||||
break
|
||||
start = m.start()
|
||||
if function_start_re.match(astr, start, m.end()):
|
||||
while True:
|
||||
i = astr.rfind('\n', ind, start)
|
||||
if i==-1:
|
||||
break
|
||||
start = i
|
||||
if astr[i:i+7]!='\n $':
|
||||
break
|
||||
start += 1
|
||||
m = routine_end_re.search(astr, m.end())
|
||||
ind = end = m and m.end()-1 or len(astr)
|
||||
spanlist.append((start, end))
|
||||
return spanlist
|
||||
|
||||
template_re = re.compile(r"<\s*(\w[\w\d]*)\s*>")
|
||||
named_re = re.compile(r"<\s*(\w[\w\d]*)\s*=\s*(.*?)\s*>")
|
||||
list_re = re.compile(r"<\s*((.*?))\s*>")
|
||||
|
||||
def find_repl_patterns(astr):
|
||||
reps = named_re.findall(astr)
|
||||
names = {}
|
||||
for rep in reps:
|
||||
name = rep[0].strip() or unique_key(names)
|
||||
repl = rep[1].replace(r'\,', '@comma@')
|
||||
thelist = conv(repl)
|
||||
names[name] = thelist
|
||||
return names
|
||||
|
||||
def find_and_remove_repl_patterns(astr):
|
||||
names = find_repl_patterns(astr)
|
||||
astr = re.subn(named_re, '', astr)[0]
|
||||
return astr, names
|
||||
|
||||
item_re = re.compile(r"\A\\(?P<index>\d+)\Z")
|
||||
def conv(astr):
|
||||
b = astr.split(',')
|
||||
l = [x.strip() for x in b]
|
||||
for i in range(len(l)):
|
||||
m = item_re.match(l[i])
|
||||
if m:
|
||||
j = int(m.group('index'))
|
||||
l[i] = l[j]
|
||||
return ','.join(l)
|
||||
|
||||
def unique_key(adict):
|
||||
""" Obtain a unique key given a dictionary."""
|
||||
allkeys = list(adict.keys())
|
||||
done = False
|
||||
n = 1
|
||||
while not done:
|
||||
newkey = '__l%s' % (n)
|
||||
if newkey in allkeys:
|
||||
n += 1
|
||||
else:
|
||||
done = True
|
||||
return newkey
|
||||
|
||||
|
||||
template_name_re = re.compile(r'\A\s*(\w[\w\d]*)\s*\Z')
|
||||
def expand_sub(substr, names):
|
||||
substr = substr.replace(r'\>', '@rightarrow@')
|
||||
substr = substr.replace(r'\<', '@leftarrow@')
|
||||
lnames = find_repl_patterns(substr)
|
||||
substr = named_re.sub(r"<\1>", substr) # get rid of definition templates
|
||||
|
||||
def listrepl(mobj):
|
||||
thelist = conv(mobj.group(1).replace(r'\,', '@comma@'))
|
||||
if template_name_re.match(thelist):
|
||||
return "<%s>" % (thelist)
|
||||
name = None
|
||||
for key in lnames.keys(): # see if list is already in dictionary
|
||||
if lnames[key] == thelist:
|
||||
name = key
|
||||
if name is None: # this list is not in the dictionary yet
|
||||
name = unique_key(lnames)
|
||||
lnames[name] = thelist
|
||||
return "<%s>" % name
|
||||
|
||||
substr = list_re.sub(listrepl, substr) # convert all lists to named templates
|
||||
# newnames are constructed as needed
|
||||
|
||||
numsubs = None
|
||||
base_rule = None
|
||||
rules = {}
|
||||
for r in template_re.findall(substr):
|
||||
if r not in rules:
|
||||
thelist = lnames.get(r, names.get(r, None))
|
||||
if thelist is None:
|
||||
raise ValueError('No replicates found for <%s>' % (r))
|
||||
if r not in names and not thelist.startswith('_'):
|
||||
names[r] = thelist
|
||||
rule = [i.replace('@comma@', ',') for i in thelist.split(',')]
|
||||
num = len(rule)
|
||||
|
||||
if numsubs is None:
|
||||
numsubs = num
|
||||
rules[r] = rule
|
||||
base_rule = r
|
||||
elif num == numsubs:
|
||||
rules[r] = rule
|
||||
else:
|
||||
print("Mismatch in number of replacements (base <{}={}>) "
|
||||
"for <{}={}>. Ignoring."
|
||||
.format(base_rule, ','.join(rules[base_rule]), r, thelist))
|
||||
if not rules:
|
||||
return substr
|
||||
|
||||
def namerepl(mobj):
|
||||
name = mobj.group(1)
|
||||
return rules.get(name, (k+1)*[name])[k]
|
||||
|
||||
newstr = ''
|
||||
for k in range(numsubs):
|
||||
newstr += template_re.sub(namerepl, substr) + '\n\n'
|
||||
|
||||
newstr = newstr.replace('@rightarrow@', '>')
|
||||
newstr = newstr.replace('@leftarrow@', '<')
|
||||
return newstr
|
||||
|
||||
def process_str(allstr):
|
||||
newstr = allstr
|
||||
writestr = ''
|
||||
|
||||
struct = parse_structure(newstr)
|
||||
|
||||
oldend = 0
|
||||
names = {}
|
||||
names.update(_special_names)
|
||||
for sub in struct:
|
||||
cleanedstr, defs = find_and_remove_repl_patterns(newstr[oldend:sub[0]])
|
||||
writestr += cleanedstr
|
||||
names.update(defs)
|
||||
writestr += expand_sub(newstr[sub[0]:sub[1]], names)
|
||||
oldend = sub[1]
|
||||
writestr += newstr[oldend:]
|
||||
|
||||
return writestr
|
||||
|
||||
include_src_re = re.compile(
|
||||
r"(\n|\A)\s*include\s*['\"](?P<name>[\w\d./\\]+\.src)['\"]",
|
||||
re.I
|
||||
)
|
||||
|
||||
def resolve_includes(source):
|
||||
d = os.path.dirname(source)
|
||||
with open(source) as fid:
|
||||
lines = []
|
||||
for line in fid:
|
||||
m = include_src_re.match(line)
|
||||
if m:
|
||||
fn = m.group('name')
|
||||
if not os.path.isabs(fn):
|
||||
fn = os.path.join(d, fn)
|
||||
if os.path.isfile(fn):
|
||||
lines.extend(resolve_includes(fn))
|
||||
else:
|
||||
lines.append(line)
|
||||
else:
|
||||
lines.append(line)
|
||||
return lines
|
||||
|
||||
def process_file(source):
|
||||
lines = resolve_includes(source)
|
||||
return process_str(''.join(lines))
|
||||
|
||||
_special_names = find_repl_patterns('''
|
||||
<_c=s,d,c,z>
|
||||
<_t=real,double precision,complex,double complex>
|
||||
<prefix=s,d,c,z>
|
||||
<ftype=real,double precision,complex,double complex>
|
||||
<ctype=float,double,complex_float,complex_double>
|
||||
<ftypereal=real,double precision,\\0,\\1>
|
||||
<ctypereal=float,double,\\0,\\1>
|
||||
''')
|
||||
|
||||
# END OF CODE VENDORED FROM `numpy.distutils.from_template`
|
||||
###########################################################
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("infile", type=str,
|
||||
help="Path to the input file")
|
||||
parser.add_argument("-o", "--outdir", type=str,
|
||||
help="Path to the output directory")
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.infile.endswith(('.pyf', '.pyf.src', '.f.src')):
|
||||
raise ValueError(f"Input file has unknown extension: {args.infile}")
|
||||
|
||||
outdir_abs = os.path.join(os.getcwd(), args.outdir)
|
||||
|
||||
# Write out the .pyf/.f file
|
||||
if args.infile.endswith(('.pyf.src', '.f.src')):
|
||||
code = process_file(args.infile)
|
||||
fname_pyf = os.path.join(args.outdir,
|
||||
os.path.splitext(os.path.split(args.infile)[1])[0])
|
||||
|
||||
with open(fname_pyf, 'w') as f:
|
||||
f.write(code)
|
||||
else:
|
||||
fname_pyf = args.infile
|
||||
|
||||
# Now invoke f2py to generate the C API module file
|
||||
if args.infile.endswith(('.pyf.src', '.pyf')):
|
||||
p = subprocess.Popen([sys.executable, '-m', 'numpy.f2py', fname_pyf,
|
||||
'--build-dir', outdir_abs], #'--quiet'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
cwd=os.getcwd())
|
||||
out, err = p.communicate()
|
||||
if not (p.returncode == 0):
|
||||
raise RuntimeError(f"Writing {args.outfile} with f2py failed!\n"
|
||||
f"{out}\n"
|
||||
r"{err}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,50 +0,0 @@
|
|||
# find numpy & f2py includes
|
||||
inc_numpy = run_command(py3,
|
||||
['-c', 'import os; os.chdir(".."); import numpy; print(numpy.get_include())'],
|
||||
check : true
|
||||
).stdout().strip()
|
||||
|
||||
inc_f2py = run_command(py3,
|
||||
['-c', 'import os; os.chdir(".."); import numpy.f2py; print(numpy.f2py.get_include())'],
|
||||
check : true
|
||||
).stdout().strip()
|
||||
|
||||
|
||||
inc_np = include_directories(inc_numpy, inc_f2py)
|
||||
fortranobject_c = inc_f2py / 'fortranobject.c'
|
||||
|
||||
|
||||
fortranobject_lib = static_library('_fortranobject',
|
||||
fortranobject_c,
|
||||
# c_args: numpy_nodepr_api,
|
||||
dependencies: py3_dep,
|
||||
include_directories: [inc_np, inc_f2py],
|
||||
gnu_symbol_visibility: 'hidden',
|
||||
)
|
||||
fortranobject_dep = declare_dependency(
|
||||
link_with: fortranobject_lib,
|
||||
include_directories: [inc_np, inc_f2py],
|
||||
)
|
||||
|
||||
|
||||
# f2py generated wrappers
|
||||
|
||||
flapack_module = custom_target('flapack_module',
|
||||
output: ['_flapackmodule.c'],
|
||||
input: 'blas_lapack.pyf.src',
|
||||
command: [generate_f2pymod, '@INPUT@', '-o', '@OUTDIR@'],
|
||||
)
|
||||
|
||||
py3.extension_module('_flapack',
|
||||
flapack_module,
|
||||
link_args: [], # version_link_args,
|
||||
dependencies: [openblas_dep, fortranobject_dep],
|
||||
install: true,
|
||||
subdir: 'openblas_wrap'
|
||||
)
|
||||
|
||||
|
||||
py3.install_sources(
|
||||
['__init__.py'],
|
||||
subdir: 'openblas_wrap'
|
||||
)
|
|
@ -1,12 +0,0 @@
|
|||
libdir=/home/br/repos/OpenBLAS/
|
||||
includedir=/home/br/repos/OpenBLAS/
|
||||
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
|
||||
version=0.3.27
|
||||
extralib=-lm -lpthread -lgfortran -lquadmath -L${libdir} -lopenblas
|
||||
Name: openblas
|
||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
|
||||
Version: ${version}
|
||||
URL: https://github.com/xianyi/OpenBLAS
|
||||
Libs: -L${libdir} -lopenblas
|
||||
Libs.private: ${extralib}
|
||||
Cflags: -I${includedir}
|
|
@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef ROT
|
||||
|
||||
|
@ -47,6 +52,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
|
@ -63,6 +133,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -85,7 +156,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -108,13 +179,13 @@ int main(int argc, char *argv[]){
|
|||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
ROT (&m, x, &inc_x, y, &inc_y, c, s);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,12 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
|||
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
#undef ROTM
|
||||
|
||||
|
@ -35,6 +40,72 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define ROTM BLASFUNC(srotm)
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz)
|
||||
{
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv) {
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size)
|
||||
{
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =
|
||||
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
|
||||
printf("Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1) {
|
||||
printf("Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
|
@ -51,7 +122,7 @@ int main(int argc, char *argv[])
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1, timeg;
|
||||
|
||||
argc--;
|
||||
|
@ -97,7 +168,7 @@ int main(int argc, char *argv[])
|
|||
exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -117,13 +188,14 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
|
||||
for (l = 0; l < loops; l++) {
|
||||
begin();
|
||||
gettimeofday(&start, (struct timezone *)0);
|
||||
|
||||
ROTM(&m, x, &inc_x, y, &inc_y, param);
|
||||
|
||||
end();
|
||||
gettimeofday(&stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) +
|
||||
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
|
106
benchmark/scal.c
106
benchmark/scal.c
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SCAL
|
||||
|
||||
|
@ -43,9 +49,74 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x;
|
||||
FLOAT *x, *y;
|
||||
FLOAT alpha[2] = { 2.0, 2.0 };
|
||||
blasint m, i;
|
||||
blasint inc_x=1,inc_y=1;
|
||||
|
@ -57,6 +128,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -74,7 +146,11 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
|
||||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -87,20 +163,30 @@ int main(int argc, char *argv[]){
|
|||
|
||||
fprintf(stderr, " %6d : ", (int)m);
|
||||
|
||||
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
|
||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
for (l=0; l<loops; l++)
|
||||
{
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
end();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
SCAL (&m, alpha, x, &inc_x);
|
||||
|
||||
timeg = time1 / loops;
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
}
|
||||
|
||||
timeg /= loops;
|
||||
|
||||
#ifdef COMPLEX
|
||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg);
|
||||
|
|
|
@ -25,10 +25,17 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPMV
|
||||
|
||||
|
||||
#ifndef COMPLEX
|
||||
|
||||
#ifdef DOUBLE
|
||||
|
@ -47,6 +54,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *x, *y;
|
||||
|
@ -63,6 +135,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -90,7 +163,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -120,13 +193,13 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPR
|
||||
|
||||
|
@ -35,6 +41,73 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#define SPR BLASFUNC(sspr)
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*c;
|
||||
|
@ -56,6 +129,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -75,7 +149,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -99,13 +173,13 @@ int main(int argc, char *argv[]){
|
|||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPR (&uplo, &m, alpha, c, &inc_x, a);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SPR2
|
||||
|
@ -37,6 +42,72 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
|
||||
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a,*b,*c;
|
||||
|
@ -58,6 +129,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -81,7 +153,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -110,13 +182,13 @@ int main(int argc, char *argv[]){
|
|||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
}
|
||||
|
|
|
@ -25,7 +25,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SWAP
|
||||
|
@ -44,6 +49,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *x, *y;
|
||||
|
@ -58,6 +128,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1,timeg;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -80,7 +151,7 @@ int main(int argc, char *argv[]){
|
|||
fprintf(stderr,"Out of Memory!!\n");exit(1);
|
||||
}
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -104,13 +175,13 @@ int main(int argc, char *argv[]){
|
|||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
|
||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
|
||||
}
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SWAP (&m, x, &inc_x, y, &inc_y );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
timeg += time1;
|
||||
|
||||
|
|
|
@ -25,7 +25,13 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
|||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "bench.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef __CYGWIN32__
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "common.h"
|
||||
|
||||
|
||||
#undef SYMM
|
||||
|
||||
|
@ -47,6 +53,71 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|||
|
||||
#endif
|
||||
|
||||
#if defined(__WIN32__) || defined(__WIN64__)
|
||||
|
||||
#ifndef DELTA_EPOCH_IN_MICROSECS
|
||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
|
||||
#endif
|
||||
|
||||
int gettimeofday(struct timeval *tv, void *tz){
|
||||
|
||||
FILETIME ft;
|
||||
unsigned __int64 tmpres = 0;
|
||||
static int tzflag;
|
||||
|
||||
if (NULL != tv)
|
||||
{
|
||||
GetSystemTimeAsFileTime(&ft);
|
||||
|
||||
tmpres |= ft.dwHighDateTime;
|
||||
tmpres <<= 32;
|
||||
tmpres |= ft.dwLowDateTime;
|
||||
|
||||
/*converting file time to unix epoch*/
|
||||
tmpres /= 10; /*convert into microseconds*/
|
||||
tmpres -= DELTA_EPOCH_IN_MICROSECS;
|
||||
tv->tv_sec = (long)(tmpres / 1000000UL);
|
||||
tv->tv_usec = (long)(tmpres % 1000000UL);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
|
||||
|
||||
static void *huge_malloc(BLASLONG size){
|
||||
int shmid;
|
||||
void *address;
|
||||
|
||||
#ifndef SHM_HUGETLB
|
||||
#define SHM_HUGETLB 04000
|
||||
#endif
|
||||
|
||||
if ((shmid =shmget(IPC_PRIVATE,
|
||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
|
||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
|
||||
printf( "Memory allocation failed(shmget).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
address = shmat(shmid, NULL, SHM_RND);
|
||||
|
||||
if ((BLASLONG)address == -1){
|
||||
printf( "Memory allocation failed(shmat).\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
shmctl(shmid, IPC_RMID, 0);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
#define malloc huge_malloc
|
||||
|
||||
#endif
|
||||
|
||||
int main(int argc, char *argv[]){
|
||||
|
||||
FLOAT *a, *b, *c;
|
||||
|
@ -66,6 +137,7 @@ int main(int argc, char *argv[]){
|
|||
int to = 200;
|
||||
int step = 1;
|
||||
|
||||
struct timeval start, stop;
|
||||
double time1;
|
||||
|
||||
argc--;argv++;
|
||||
|
@ -90,7 +162,7 @@ int main(int argc, char *argv[]){
|
|||
|
||||
|
||||
|
||||
#ifdef __linux
|
||||
#ifdef linux
|
||||
srandom(getpid());
|
||||
#endif
|
||||
|
||||
|
@ -109,13 +181,13 @@ int main(int argc, char *argv[]){
|
|||
}
|
||||
}
|
||||
|
||||
begin();
|
||||
gettimeofday( &start, (struct timezone *)0);
|
||||
|
||||
SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );
|
||||
|
||||
end();
|
||||
gettimeofday( &stop, (struct timezone *)0);
|
||||
|
||||
time1 = getsec();
|
||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
|
||||
|
||||
fprintf(stderr,
|
||||
" %10.2f MFlops\n",
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue